diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /drivers/infiniband/ulp/opa_vnic | |
parent | Initial commit. (diff) | |
download | linux-upstream.tar.xz linux-upstream.zip |
Adding upstream version 6.1.76.upstream/6.1.76upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/infiniband/ulp/opa_vnic')
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/Kconfig | 9 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/Makefile | 9 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c | 513 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h | 524 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c | 185 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h | 329 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c | 400 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 1056 | ||||
-rw-r--r-- | drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c | 390 |
9 files changed, 3415 insertions, 0 deletions
diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig b/drivers/infiniband/ulp/opa_vnic/Kconfig new file mode 100644 index 000000000..4d43d055f --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/Kconfig @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +config INFINIBAND_OPA_VNIC + tristate "Cornelis OPX VNIC support" + depends on X86_64 && INFINIBAND + help + This is Omni-Path Express (OPX) Virtual Network Interface Controller (VNIC) + driver for Ethernet over Omni-Path feature. It implements the HW + independent VNIC functionality. It interfaces with Linux stack for + data path and IB MAD for the control path. diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile b/drivers/infiniband/ulp/opa_vnic/Makefile new file mode 100644 index 000000000..196183817 --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Makefile - Cornelis Omni-Path Express Virtual Network Controller driver +# Copyright(c) 2017, Intel Corporation. +# Copyright(c) 2021, Cornelis Networks. +# +obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o + +opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \ + opa_vnic_vema.o opa_vnic_vema_iface.o diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c new file mode 100644 index 000000000..31cd36141 --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c @@ -0,0 +1,513 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains OPA VNIC encapsulation/decapsulation function. + */ + +#include <linux/if_ether.h> +#include <linux/if_vlan.h> + +#include "opa_vnic_internal.h" + +/* OPA 16B Header fields */ +#define OPA_16B_LID_MASK 0xFFFFFull +#define OPA_16B_SLID_HIGH_SHFT 8 +#define OPA_16B_SLID_MASK 0xF00ull +#define OPA_16B_DLID_MASK 0xF000ull +#define OPA_16B_DLID_HIGH_SHFT 12 +#define OPA_16B_LEN_SHFT 20 +#define OPA_16B_SC_SHFT 20 +#define OPA_16B_RC_SHFT 25 +#define OPA_16B_PKEY_SHFT 16 + +#define OPA_VNIC_L4_HDR_SHFT 16 + +/* L2+L4 hdr len is 20 bytes (5 quad words) */ +#define OPA_VNIC_HDR_QW_LEN 5 + +static inline void opa_vnic_make_header(u8 *hdr, u32 slid, u32 dlid, u16 len, + u16 pkey, u16 entropy, u8 sc, u8 rc, + u8 l4_type, u16 l4_hdr) +{ + /* h[1]: LT=1, 16B L2=10 */ + u32 h[OPA_VNIC_HDR_QW_LEN] = {0, 0xc0000000, 0, 0, 0}; + + h[2] = l4_type; + h[3] = entropy; + h[4] = l4_hdr << OPA_VNIC_L4_HDR_SHFT; + + /* Extract and set 4 upper bits and 20 lower bits of the lids */ + h[0] |= (slid & OPA_16B_LID_MASK); + h[2] |= ((slid >> (20 - OPA_16B_SLID_HIGH_SHFT)) & OPA_16B_SLID_MASK); + + h[1] |= (dlid & OPA_16B_LID_MASK); + h[2] |= ((dlid >> (20 - OPA_16B_DLID_HIGH_SHFT)) & OPA_16B_DLID_MASK); + + h[0] |= (len << OPA_16B_LEN_SHFT); + h[1] |= (rc << OPA_16B_RC_SHFT); + h[1] |= (sc << OPA_16B_SC_SHFT); + h[2] |= ((u32)pkey << OPA_16B_PKEY_SHFT); + + memcpy(hdr, h, OPA_VNIC_HDR_LEN); +} + +/* + * Using a simple hash table for mac table implementation with the last octet + * of mac address as a key. + */ +static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl) +{ + struct opa_vnic_mac_tbl_node *node; + struct hlist_node *tmp; + int bkt; + + if (!mactbl) + return; + + vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) { + hash_del(&node->hlist); + kfree(node); + } + kfree(mactbl); +} + +static struct hlist_head *opa_vnic_alloc_mac_tbl(void) +{ + u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE; + struct hlist_head *mactbl; + + mactbl = kzalloc(size, GFP_KERNEL); + if (!mactbl) + return ERR_PTR(-ENOMEM); + + vnic_hash_init(mactbl); + return mactbl; +} + +/* opa_vnic_release_mac_tbl - empty and free the mac table */ +void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter) +{ + struct hlist_head *mactbl; + + mutex_lock(&adapter->mactbl_lock); + mactbl = rcu_access_pointer(adapter->mactbl); + rcu_assign_pointer(adapter->mactbl, NULL); + synchronize_rcu(); + opa_vnic_free_mac_tbl(mactbl); + adapter->info.vport.mac_tbl_digest = 0; + mutex_unlock(&adapter->mactbl_lock); +} + +/* + * opa_vnic_query_mac_tbl - query the mac table for a section + * + * This function implements query of specific function of the mac table. + * The function also expects the requested range to be valid. + */ +void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter, + struct opa_veswport_mactable *tbl) +{ + struct opa_vnic_mac_tbl_node *node; + struct hlist_head *mactbl; + int bkt; + u16 loffset, lnum_entries; + + rcu_read_lock(); + mactbl = rcu_dereference(adapter->mactbl); + if (!mactbl) + goto get_mac_done; + + loffset = be16_to_cpu(tbl->offset); + lnum_entries = be16_to_cpu(tbl->num_entries); + + vnic_hash_for_each(mactbl, bkt, node, hlist) { + struct __opa_vnic_mactable_entry *nentry = &node->entry; + struct opa_veswport_mactable_entry *entry; + + if ((node->index < loffset) || + (node->index >= (loffset + lnum_entries))) + continue; + + /* populate entry in the tbl corresponding to the index */ + entry = &tbl->tbl_entries[node->index - loffset]; + memcpy(entry->mac_addr, nentry->mac_addr, + ARRAY_SIZE(entry->mac_addr)); + memcpy(entry->mac_addr_mask, nentry->mac_addr_mask, + ARRAY_SIZE(entry->mac_addr_mask)); + entry->dlid_sd = cpu_to_be32(nentry->dlid_sd); + } + tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest); +get_mac_done: + rcu_read_unlock(); +} + +/* + * opa_vnic_update_mac_tbl - update mac table section + * + * This function updates the specified section of the mac table. + * The procedure includes following steps. + * - Allocate a new mac (hash) table. + * - Add the specified entries to the new table. + * (except the ones that are requested to be deleted). + * - Add all the other entries from the old mac table. + * - If there is a failure, free the new table and return. + * - Switch to the new table. + * - Free the old table and return. + * + * The function also expects the requested range to be valid. + */ +int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter, + struct opa_veswport_mactable *tbl) +{ + struct opa_vnic_mac_tbl_node *node, *new_node; + struct hlist_head *new_mactbl, *old_mactbl; + int i, bkt, rc = 0; + u8 key; + u16 loffset, lnum_entries; + + mutex_lock(&adapter->mactbl_lock); + /* allocate new mac table */ + new_mactbl = opa_vnic_alloc_mac_tbl(); + if (IS_ERR(new_mactbl)) { + mutex_unlock(&adapter->mactbl_lock); + return PTR_ERR(new_mactbl); + } + + loffset = be16_to_cpu(tbl->offset); + lnum_entries = be16_to_cpu(tbl->num_entries); + + /* add updated entries to the new mac table */ + for (i = 0; i < lnum_entries; i++) { + struct __opa_vnic_mactable_entry *nentry; + struct opa_veswport_mactable_entry *entry = + &tbl->tbl_entries[i]; + u8 *mac_addr = entry->mac_addr; + u8 empty_mac[ETH_ALEN] = { 0 }; + + v_dbg("new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x\n", + loffset + i, mac_addr[0], mac_addr[1], mac_addr[2], + mac_addr[3], mac_addr[4], mac_addr[5], + entry->dlid_sd); + + /* if the entry is being removed, do not add it */ + if (!memcmp(mac_addr, empty_mac, ARRAY_SIZE(empty_mac))) + continue; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) { + rc = -ENOMEM; + goto updt_done; + } + + node->index = loffset + i; + nentry = &node->entry; + memcpy(nentry->mac_addr, entry->mac_addr, + ARRAY_SIZE(nentry->mac_addr)); + memcpy(nentry->mac_addr_mask, entry->mac_addr_mask, + ARRAY_SIZE(nentry->mac_addr_mask)); + nentry->dlid_sd = be32_to_cpu(entry->dlid_sd); + key = node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX]; + vnic_hash_add(new_mactbl, &node->hlist, key); + } + + /* add other entries from current mac table to new mac table */ + old_mactbl = rcu_access_pointer(adapter->mactbl); + if (!old_mactbl) + goto switch_tbl; + + vnic_hash_for_each(old_mactbl, bkt, node, hlist) { + if ((node->index >= loffset) && + (node->index < (loffset + lnum_entries))) + continue; + + new_node = kzalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) { + rc = -ENOMEM; + goto updt_done; + } + + new_node->index = node->index; + memcpy(&new_node->entry, &node->entry, sizeof(node->entry)); + key = new_node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX]; + vnic_hash_add(new_mactbl, &new_node->hlist, key); + } + +switch_tbl: + /* switch to new table */ + rcu_assign_pointer(adapter->mactbl, new_mactbl); + synchronize_rcu(); + + adapter->info.vport.mac_tbl_digest = be32_to_cpu(tbl->mac_tbl_digest); +updt_done: + /* upon failure, free the new table; otherwise, free the old table */ + if (rc) + opa_vnic_free_mac_tbl(new_mactbl); + else + opa_vnic_free_mac_tbl(old_mactbl); + + mutex_unlock(&adapter->mactbl_lock); + return rc; +} + +/* opa_vnic_chk_mac_tbl - check mac table for dlid */ +static uint32_t opa_vnic_chk_mac_tbl(struct opa_vnic_adapter *adapter, + struct ethhdr *mac_hdr) +{ + struct opa_vnic_mac_tbl_node *node; + struct hlist_head *mactbl; + u32 dlid = 0; + u8 key; + + rcu_read_lock(); + mactbl = rcu_dereference(adapter->mactbl); + if (unlikely(!mactbl)) + goto chk_done; + + key = mac_hdr->h_dest[OPA_VNIC_MAC_HASH_IDX]; + vnic_hash_for_each_possible(mactbl, node, hlist, key) { + struct __opa_vnic_mactable_entry *entry = &node->entry; + + /* if related to source mac, skip */ + if (unlikely(OPA_VNIC_DLID_SD_IS_SRC_MAC(entry->dlid_sd))) + continue; + + if (!memcmp(node->entry.mac_addr, mac_hdr->h_dest, + ARRAY_SIZE(node->entry.mac_addr))) { + /* mac address found */ + dlid = OPA_VNIC_DLID_SD_GET_DLID(node->entry.dlid_sd); + break; + } + } + +chk_done: + rcu_read_unlock(); + return dlid; +} + +/* opa_vnic_get_dlid - find and return the DLID */ +static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter, + struct sk_buff *skb, u8 def_port) +{ + struct __opa_veswport_info *info = &adapter->info; + struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); + u32 dlid; + + dlid = opa_vnic_chk_mac_tbl(adapter, mac_hdr); + if (dlid) + return dlid; + + if (is_multicast_ether_addr(mac_hdr->h_dest)) { + dlid = info->vesw.u_mcast_dlid; + } else { + if (is_local_ether_addr(mac_hdr->h_dest)) { + dlid = ((uint32_t)mac_hdr->h_dest[5] << 16) | + ((uint32_t)mac_hdr->h_dest[4] << 8) | + mac_hdr->h_dest[3]; + if (unlikely(!dlid)) + v_warn("Null dlid in MAC address\n"); + } else if (def_port != OPA_VNIC_INVALID_PORT) { + if (def_port < OPA_VESW_MAX_NUM_DEF_PORT) + dlid = info->vesw.u_ucast_dlid[def_port]; + } + } + + return dlid; +} + +/* opa_vnic_get_sc - return the service class */ +static u8 opa_vnic_get_sc(struct __opa_veswport_info *info, + struct sk_buff *skb) +{ + struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); + u16 vlan_tci; + u8 sc; + + if (!__vlan_get_tag(skb, &vlan_tci)) { + u8 pcp = OPA_VNIC_VLAN_PCP(vlan_tci); + + if (is_multicast_ether_addr(mac_hdr->h_dest)) + sc = info->vport.pcp_to_sc_mc[pcp]; + else + sc = info->vport.pcp_to_sc_uc[pcp]; + } else { + if (is_multicast_ether_addr(mac_hdr->h_dest)) + sc = info->vport.non_vlan_sc_mc; + else + sc = info->vport.non_vlan_sc_uc; + } + + return sc; +} + +u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb) +{ + struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); + struct __opa_veswport_info *info = &adapter->info; + u8 vl; + + if (skb_vlan_tag_present(skb)) { + u8 pcp = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT; + + if (is_multicast_ether_addr(mac_hdr->h_dest)) + vl = info->vport.pcp_to_vl_mc[pcp]; + else + vl = info->vport.pcp_to_vl_uc[pcp]; + } else { + if (is_multicast_ether_addr(mac_hdr->h_dest)) + vl = info->vport.non_vlan_vl_mc; + else + vl = info->vport.non_vlan_vl_uc; + } + + return vl; +} + +/* opa_vnic_get_rc - return the routing control */ +static u8 opa_vnic_get_rc(struct __opa_veswport_info *info, + struct sk_buff *skb) +{ + u8 proto, rout_ctrl; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IPV6): + proto = ipv6_hdr(skb)->nexthdr; + if (proto == IPPROTO_TCP) + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, + IPV6_TCP); + else if (proto == IPPROTO_UDP) + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, + IPV6_UDP); + else + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, IPV6); + break; + case htons(ETH_P_IP): + proto = ip_hdr(skb)->protocol; + if (proto == IPPROTO_TCP) + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, + IPV4_TCP); + else if (proto == IPPROTO_UDP) + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, + IPV4_UDP); + else + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, IPV4); + break; + default: + rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, DEFAULT); + } + + return rout_ctrl; +} + +/* opa_vnic_calc_entropy - calculate the packet entropy */ +u8 opa_vnic_calc_entropy(struct sk_buff *skb) +{ + u32 hash = skb_get_hash(skb); + + /* store XOR of all bytes in lower 8 bits */ + hash ^= hash >> 8; + hash ^= hash >> 16; + + /* return lower 8 bits as entropy */ + return (u8)(hash & 0xFF); +} + +/* opa_vnic_get_def_port - get default port based on entropy */ +static inline u8 opa_vnic_get_def_port(struct opa_vnic_adapter *adapter, + u8 entropy) +{ + u8 flow_id; + + /* Add the upper and lower 4-bits of entropy to get the flow id */ + flow_id = ((entropy & 0xf) + (entropy >> 4)); + return adapter->flow_tbl[flow_id & (OPA_VNIC_FLOW_TBL_SIZE - 1)]; +} + +/* Calculate packet length including OPA header, crc and padding */ +static inline int opa_vnic_wire_length(struct sk_buff *skb) +{ + u32 pad_len; + + /* padding for 8 bytes size alignment */ + pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7; + pad_len += OPA_VNIC_ICRC_TAIL_LEN; + + return (skb->len + pad_len) >> 3; +} + +/* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */ +void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb) +{ + struct __opa_veswport_info *info = &adapter->info; + struct opa_vnic_skb_mdata *mdata; + u8 def_port, sc, rc, entropy, *hdr; + u16 len, l4_hdr; + u32 dlid; + + hdr = skb_push(skb, OPA_VNIC_HDR_LEN); + + entropy = opa_vnic_calc_entropy(skb); + def_port = opa_vnic_get_def_port(adapter, entropy); + len = opa_vnic_wire_length(skb); + dlid = opa_vnic_get_dlid(adapter, skb, def_port); + sc = opa_vnic_get_sc(info, skb); + rc = opa_vnic_get_rc(info, skb); + l4_hdr = info->vesw.vesw_id; + + mdata = skb_push(skb, sizeof(*mdata)); + mdata->vl = opa_vnic_get_vl(adapter, skb); + mdata->entropy = entropy; + mdata->flags = 0; + if (unlikely(!dlid)) { + mdata->flags = OPA_VNIC_SKB_MDATA_ENCAP_ERR; + return; + } + + opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len, + info->vesw.pkey, entropy, sc, rc, + OPA_VNIC_L4_ETHR, l4_hdr); +} diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h new file mode 100644 index 000000000..012fc27c5 --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h @@ -0,0 +1,524 @@ +#ifndef _OPA_VNIC_ENCAP_H +#define _OPA_VNIC_ENCAP_H +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains all OPA VNIC declaration required for encapsulation + * and decapsulation of Ethernet packets + */ + +#include <linux/types.h> +#include <rdma/ib_mad.h> + +/* EMA class version */ +#define OPA_EMA_CLASS_VERSION 0x80 + +/* + * Define the Intel vendor management class for OPA + * ETHERNET MANAGEMENT + */ +#define OPA_MGMT_CLASS_INTEL_EMA 0x34 + +/* EM attribute IDs */ +#define OPA_EM_ATTR_CLASS_PORT_INFO 0x0001 +#define OPA_EM_ATTR_VESWPORT_INFO 0x0011 +#define OPA_EM_ATTR_VESWPORT_MAC_ENTRIES 0x0012 +#define OPA_EM_ATTR_IFACE_UCAST_MACS 0x0013 +#define OPA_EM_ATTR_IFACE_MCAST_MACS 0x0014 +#define OPA_EM_ATTR_DELETE_VESW 0x0015 +#define OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS 0x0020 +#define OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS 0x0022 + +/* VNIC configured and operational state values */ +#define OPA_VNIC_STATE_DROP_ALL 0x1 +#define OPA_VNIC_STATE_FORWARDING 0x3 + +#define OPA_VESW_MAX_NUM_DEF_PORT 16 +#define OPA_VNIC_MAX_NUM_PCP 8 + +#define OPA_VNIC_EMA_DATA (OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR) + +/* Defines for vendor specific notice(trap) attributes */ +#define OPA_INTEL_EMA_NOTICE_TYPE_INFO 0x04 + +/* INTEL OUI */ +#define INTEL_OUI_1 0x00 +#define INTEL_OUI_2 0x06 +#define INTEL_OUI_3 0x6a + +/* Trap opcodes sent from VNIC */ +#define OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1 +#define OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2 +#define OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3 + +#define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd) (!!((dlid_sd) & 0x20)) +#define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd) ((dlid_sd) >> 8) + +/* VNIC Ethernet link status */ +#define OPA_VNIC_ETH_LINK_UP 1 +#define OPA_VNIC_ETH_LINK_DOWN 2 + +/* routing control */ +#define OPA_VNIC_ENCAP_RC_DEFAULT 0 +#define OPA_VNIC_ENCAP_RC_IPV4 4 +#define OPA_VNIC_ENCAP_RC_IPV4_UDP 8 +#define OPA_VNIC_ENCAP_RC_IPV4_TCP 12 +#define OPA_VNIC_ENCAP_RC_IPV6 16 +#define OPA_VNIC_ENCAP_RC_IPV6_TCP 20 +#define OPA_VNIC_ENCAP_RC_IPV6_UDP 24 + +#define OPA_VNIC_ENCAP_RC_EXT(w, b) (((w) >> OPA_VNIC_ENCAP_RC_ ## b) & 0x7) + +/** + * struct opa_vesw_info - OPA vnic switch information + * @fabric_id: 10-bit fabric id + * @vesw_id: 12-bit virtual ethernet switch id + * @rsvd0: reserved bytes + * @def_port_mask: bitmask of default ports + * @rsvd1: reserved bytes + * @pkey: partition key + * @rsvd2: reserved bytes + * @u_mcast_dlid: unknown multicast dlid + * @u_ucast_dlid: array of unknown unicast dlids + * @rsvd3: reserved bytes + * @rc: routing control + * @eth_mtu: Ethernet MTU + * @rsvd4: reserved bytes + */ +struct opa_vesw_info { + __be16 fabric_id; + __be16 vesw_id; + + u8 rsvd0[6]; + __be16 def_port_mask; + + u8 rsvd1[2]; + __be16 pkey; + + u8 rsvd2[4]; + __be32 u_mcast_dlid; + __be32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT]; + + __be32 rc; + + u8 rsvd3[56]; + __be16 eth_mtu; + u8 rsvd4[2]; +} __packed; + +/** + * struct opa_per_veswport_info - OPA vnic per port information + * @port_num: port number + * @eth_link_status: current ethernet link state + * @rsvd0: reserved bytes + * @base_mac_addr: base mac address + * @config_state: configured port state + * @oper_state: operational port state + * @max_mac_tbl_ent: max number of mac table entries + * @max_smac_ent: max smac entries in mac table + * @mac_tbl_digest: mac table digest + * @rsvd1: reserved bytes + * @encap_slid: base slid for the port + * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets + * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets + * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets + * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets + * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets + * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets + * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets + * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets + * @rsvd2: reserved bytes + * @uc_macs_gen_count: generation count for unicast macs list + * @mc_macs_gen_count: generation count for multicast macs list + * @rsvd3: reserved bytes + */ +struct opa_per_veswport_info { + __be32 port_num; + + u8 eth_link_status; + u8 rsvd0[3]; + + u8 base_mac_addr[ETH_ALEN]; + u8 config_state; + u8 oper_state; + + __be16 max_mac_tbl_ent; + __be16 max_smac_ent; + __be32 mac_tbl_digest; + u8 rsvd1[4]; + + __be32 encap_slid; + + u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP]; + u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP]; + u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP]; + u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP]; + + u8 non_vlan_sc_uc; + u8 non_vlan_vl_uc; + u8 non_vlan_sc_mc; + u8 non_vlan_vl_mc; + + u8 rsvd2[48]; + + __be16 uc_macs_gen_count; + __be16 mc_macs_gen_count; + + u8 rsvd3[8]; +} __packed; + +/** + * struct opa_veswport_info - OPA vnic port information + * @vesw: OPA vnic switch information + * @vport: OPA vnic per port information + * + * On host, each of the virtual ethernet ports belongs + * to a different virtual ethernet switches. + */ +struct opa_veswport_info { + struct opa_vesw_info vesw; + struct opa_per_veswport_info vport; +}; + +/** + * struct opa_veswport_mactable_entry - single entry in the forwarding table + * @mac_addr: MAC address + * @mac_addr_mask: MAC address bit mask + * @dlid_sd: Matching DLID and side data + * + * On the host each virtual ethernet port will have + * a forwarding table. These tables are used to + * map a MAC to a LID and other data. For more + * details see struct opa_veswport_mactable_entries. + * This is the structure of a single mactable entry + */ +struct opa_veswport_mactable_entry { + u8 mac_addr[ETH_ALEN]; + u8 mac_addr_mask[ETH_ALEN]; + __be32 dlid_sd; +} __packed; + +/** + * struct opa_veswport_mactable - Forwarding table array + * @offset: mac table starting offset + * @num_entries: Number of entries to get or set + * @mac_tbl_digest: mac table digest + * @tbl_entries: Array of table entries + * + * The EM sends down this structure in a MAD indicating + * the starting offset in the forwarding table that this + * entry is to be loaded into and the number of entries + * that that this MAD instance contains + * The mac_tbl_digest has been added to this MAD structure. It will be set by + * the EM and it will be used by the EM to check if there are any + * discrepancies with this value and the value + * maintained by the EM in the case of VNIC port being deleted or unloaded + * A new instantiation of a VNIC will always have a value of zero. + * This value is stored as part of the vnic adapter structure and will be + * accessed by the GET and SET routines for both the mactable entries and the + * veswport info. + */ +struct opa_veswport_mactable { + __be16 offset; + __be16 num_entries; + __be32 mac_tbl_digest; + struct opa_veswport_mactable_entry tbl_entries[]; +} __packed; + +/** + * struct opa_veswport_summary_counters - summary counters + * @vp_instance: vport instance on the OPA port + * @vesw_id: virtual ethernet switch id + * @veswport_num: virtual ethernet switch port number + * @tx_errors: transmit errors + * @rx_errors: receive errors + * @tx_packets: transmit packets + * @rx_packets: receive packets + * @tx_bytes: transmit bytes + * @rx_bytes: receive bytes + * @tx_unicast: unicast packets transmitted + * @tx_mcastbcast: multicast/broadcast packets transmitted + * @tx_untagged: non-vlan packets transmitted + * @tx_vlan: vlan packets transmitted + * @tx_64_size: transmit packet length is 64 bytes + * @tx_65_127: transmit packet length is >=65 and < 127 bytes + * @tx_128_255: transmit packet length is >=128 and < 255 bytes + * @tx_256_511: transmit packet length is >=256 and < 511 bytes + * @tx_512_1023: transmit packet length is >=512 and < 1023 bytes + * @tx_1024_1518: transmit packet length is >=1024 and < 1518 bytes + * @tx_1519_max: transmit packet length >= 1519 bytes + * @rx_unicast: unicast packets received + * @rx_mcastbcast: multicast/broadcast packets received + * @rx_untagged: non-vlan packets received + * @rx_vlan: vlan packets received + * @rx_64_size: received packet length is 64 bytes + * @rx_65_127: received packet length is >=65 and < 127 bytes + * @rx_128_255: received packet length is >=128 and < 255 bytes + * @rx_256_511: received packet length is >=256 and < 511 bytes + * @rx_512_1023: received packet length is >=512 and < 1023 bytes + * @rx_1024_1518: received packet length is >=1024 and < 1518 bytes + * @rx_1519_max: received packet length >= 1519 bytes + * @reserved: reserved bytes + * + * All the above are counters of corresponding conditions. + */ +struct opa_veswport_summary_counters { + __be16 vp_instance; + __be16 vesw_id; + __be32 veswport_num; + + __be64 tx_errors; + __be64 rx_errors; + __be64 tx_packets; + __be64 rx_packets; + __be64 tx_bytes; + __be64 rx_bytes; + + __be64 tx_unicast; + __be64 tx_mcastbcast; + + __be64 tx_untagged; + __be64 tx_vlan; + + __be64 tx_64_size; + __be64 tx_65_127; + __be64 tx_128_255; + __be64 tx_256_511; + __be64 tx_512_1023; + __be64 tx_1024_1518; + __be64 tx_1519_max; + + __be64 rx_unicast; + __be64 rx_mcastbcast; + + __be64 rx_untagged; + __be64 rx_vlan; + + __be64 rx_64_size; + __be64 rx_65_127; + __be64 rx_128_255; + __be64 rx_256_511; + __be64 rx_512_1023; + __be64 rx_1024_1518; + __be64 rx_1519_max; + + __be64 reserved[16]; +} __packed; + +/** + * struct opa_veswport_error_counters - error counters + * @vp_instance: vport instance on the OPA port + * @vesw_id: virtual ethernet switch id + * @veswport_num: virtual ethernet switch port number + * @tx_errors: transmit errors + * @rx_errors: receive errors + * @rsvd0: reserved bytes + * @tx_smac_filt: smac filter errors + * @rsvd1: reserved bytes + * @rsvd2: reserved bytes + * @rsvd3: reserved bytes + * @tx_dlid_zero: transmit packets with invalid dlid + * @rsvd4: reserved bytes + * @tx_logic: other transmit errors + * @rsvd5: reserved bytes + * @tx_drop_state: packet tansmission in non-forward port state + * @rx_bad_veswid: received packet with invalid vesw id + * @rsvd6: reserved bytes + * @rx_runt: received ethernet packet with length < 64 bytes + * @rx_oversize: received ethernet packet with length > MTU size + * @rsvd7: reserved bytes + * @rx_eth_down: received packets when interface is down + * @rx_drop_state: received packets in non-forwarding port state + * @rx_logic: other receive errors + * @rsvd8: reserved bytes + * @rsvd9: reserved bytes + * + * All the above are counters of corresponding error conditions. + */ +struct opa_veswport_error_counters { + __be16 vp_instance; + __be16 vesw_id; + __be32 veswport_num; + + __be64 tx_errors; + __be64 rx_errors; + + __be64 rsvd0; + __be64 tx_smac_filt; + __be64 rsvd1; + __be64 rsvd2; + __be64 rsvd3; + __be64 tx_dlid_zero; + __be64 rsvd4; + __be64 tx_logic; + __be64 rsvd5; + __be64 tx_drop_state; + + __be64 rx_bad_veswid; + __be64 rsvd6; + __be64 rx_runt; + __be64 rx_oversize; + __be64 rsvd7; + __be64 rx_eth_down; + __be64 rx_drop_state; + __be64 rx_logic; + __be64 rsvd8; + + __be64 rsvd9[16]; +} __packed; + +/** + * struct opa_veswport_trap - Trap message sent to EM by VNIC + * @fabric_id: 10 bit fabric id + * @veswid: 12 bit virtual ethernet switch id + * @veswportnum: logical port number on the Virtual switch + * @opaportnum: physical port num (redundant on host) + * @veswportindex: switch port index on opa port 0 based + * @opcode: operation + * @reserved: 32 bit for alignment + * + * The VNIC will send trap messages to the Ethernet manager to + * inform it about changes to the VNIC config, behaviour etc. + * This is the format of the trap payload. + */ +struct opa_veswport_trap { + __be16 fabric_id; + __be16 veswid; + __be32 veswportnum; + __be16 opaportnum; + u8 veswportindex; + u8 opcode; + __be32 reserved; +} __packed; + +/** + * struct opa_vnic_iface_mac_entry - single entry in the mac list + * @mac_addr: MAC address + */ +struct opa_vnic_iface_mac_entry { + u8 mac_addr[ETH_ALEN]; +}; + +/** + * struct opa_veswport_iface_macs - Msg to set globally administered MAC + * @start_idx: position of first entry (0 based) + * @num_macs_in_msg: number of MACs in this message + * @tot_macs_in_lst: The total number of MACs the agent has + * @gen_count: gen_count to indicate change + * @entry: The mac list entry + * + * Same attribute IDS and attribute modifiers as in locally administered + * addresses used to set globally administered addresses + */ +struct opa_veswport_iface_macs { + __be16 start_idx; + __be16 num_macs_in_msg; + __be16 tot_macs_in_lst; + __be16 gen_count; + struct opa_vnic_iface_mac_entry entry[]; +} __packed; + +/** + * struct opa_vnic_vema_mad - Generic VEMA MAD + * @mad_hdr: Generic MAD header + * @rmpp_hdr: RMPP header for vendor specific MADs + * @reserved: reserved bytes + * @oui: Unique org identifier + * @data: MAD data + */ +struct opa_vnic_vema_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 reserved; + u8 oui[3]; + u8 data[OPA_VNIC_EMA_DATA]; +}; + +/** + * struct opa_vnic_notice_attr - Generic Notice MAD + * @gen_type: Generic/Specific bit and type of notice + * @oui_1: Vendor ID byte 1 + * @oui_2: Vendor ID byte 2 + * @oui_3: Vendor ID byte 3 + * @trap_num: Trap number + * @toggle_count: Notice toggle bit and count value + * @issuer_lid: Trap issuer's lid + * @reserved: reserved bytes + * @issuer_gid: Issuer GID (only if Report method) + * @raw_data: Trap message body + */ +struct opa_vnic_notice_attr { + u8 gen_type; + u8 oui_1; + u8 oui_2; + u8 oui_3; + __be16 trap_num; + __be16 toggle_count; + __be32 issuer_lid; + __be32 reserved; + u8 issuer_gid[16]; + u8 raw_data[64]; +} __packed; + +/** + * struct opa_vnic_vema_mad_trap - Generic VEMA MAD Trap + * @mad_hdr: Generic MAD header + * @rmpp_hdr: RMPP header for vendor specific MADs + * @reserved: reserved bytes + * @oui: Unique org identifier + * @notice: Notice structure + */ +struct opa_vnic_vema_mad_trap { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 reserved; + u8 oui[3]; + struct opa_vnic_notice_attr notice; +}; + +#endif /* _OPA_VNIC_ENCAP_H */ diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c new file mode 100644 index 000000000..29b3d8fce --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c @@ -0,0 +1,185 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains OPA VNIC ethtool functions + */ + +#include <linux/ethtool.h> + +#include "opa_vnic_internal.h" + +enum {NETDEV_STATS, VNIC_STATS}; + +struct vnic_stats { + char stat_string[ETH_GSTRING_LEN]; + struct { + int sizeof_stat; + int stat_offset; + }; +}; + +#define VNIC_STAT(m) { sizeof_field(struct opa_vnic_stats, m), \ + offsetof(struct opa_vnic_stats, m) } + +static struct vnic_stats vnic_gstrings_stats[] = { + /* NETDEV stats */ + {"rx_packets", VNIC_STAT(netstats.rx_packets)}, + {"tx_packets", VNIC_STAT(netstats.tx_packets)}, + {"rx_bytes", VNIC_STAT(netstats.rx_bytes)}, + {"tx_bytes", VNIC_STAT(netstats.tx_bytes)}, + {"rx_errors", VNIC_STAT(netstats.rx_errors)}, + {"tx_errors", VNIC_STAT(netstats.tx_errors)}, + {"rx_dropped", VNIC_STAT(netstats.rx_dropped)}, + {"tx_dropped", VNIC_STAT(netstats.tx_dropped)}, + + /* SUMMARY counters */ + {"tx_unicast", VNIC_STAT(tx_grp.unicast)}, + {"tx_mcastbcast", VNIC_STAT(tx_grp.mcastbcast)}, + {"tx_untagged", VNIC_STAT(tx_grp.untagged)}, + {"tx_vlan", VNIC_STAT(tx_grp.vlan)}, + + {"tx_64_size", VNIC_STAT(tx_grp.s_64)}, + {"tx_65_127", VNIC_STAT(tx_grp.s_65_127)}, + {"tx_128_255", VNIC_STAT(tx_grp.s_128_255)}, + {"tx_256_511", VNIC_STAT(tx_grp.s_256_511)}, + {"tx_512_1023", VNIC_STAT(tx_grp.s_512_1023)}, + {"tx_1024_1518", VNIC_STAT(tx_grp.s_1024_1518)}, + {"tx_1519_max", VNIC_STAT(tx_grp.s_1519_max)}, + + {"rx_unicast", VNIC_STAT(rx_grp.unicast)}, + {"rx_mcastbcast", VNIC_STAT(rx_grp.mcastbcast)}, + {"rx_untagged", VNIC_STAT(rx_grp.untagged)}, + {"rx_vlan", VNIC_STAT(rx_grp.vlan)}, + + {"rx_64_size", VNIC_STAT(rx_grp.s_64)}, + {"rx_65_127", VNIC_STAT(rx_grp.s_65_127)}, + {"rx_128_255", VNIC_STAT(rx_grp.s_128_255)}, + {"rx_256_511", VNIC_STAT(rx_grp.s_256_511)}, + {"rx_512_1023", VNIC_STAT(rx_grp.s_512_1023)}, + {"rx_1024_1518", VNIC_STAT(rx_grp.s_1024_1518)}, + {"rx_1519_max", VNIC_STAT(rx_grp.s_1519_max)}, + + /* ERROR counters */ + {"rx_fifo_errors", VNIC_STAT(netstats.rx_fifo_errors)}, + {"rx_length_errors", VNIC_STAT(netstats.rx_length_errors)}, + + {"tx_fifo_errors", VNIC_STAT(netstats.tx_fifo_errors)}, + {"tx_carrier_errors", VNIC_STAT(netstats.tx_carrier_errors)}, + + {"tx_dlid_zero", VNIC_STAT(tx_dlid_zero)}, + {"tx_drop_state", VNIC_STAT(tx_drop_state)}, + {"rx_drop_state", VNIC_STAT(rx_drop_state)}, + {"rx_oversize", VNIC_STAT(rx_oversize)}, + {"rx_runt", VNIC_STAT(rx_runt)}, +}; + +#define VNIC_STATS_LEN ARRAY_SIZE(vnic_gstrings_stats) + +/* vnic_get_drvinfo - get driver info */ +static void vnic_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + strscpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver)); + strscpy(drvinfo->bus_info, dev_name(netdev->dev.parent), + sizeof(drvinfo->bus_info)); +} + +/* vnic_get_sset_count - get string set count */ +static int vnic_get_sset_count(struct net_device *netdev, int sset) +{ + return (sset == ETH_SS_STATS) ? VNIC_STATS_LEN : -EOPNOTSUPP; +} + +/* vnic_get_ethtool_stats - get statistics */ +static void vnic_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + struct opa_vnic_stats vstats; + int i; + + memset(&vstats, 0, sizeof(vstats)); + spin_lock(&adapter->stats_lock); + adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats); + spin_unlock(&adapter->stats_lock); + for (i = 0; i < VNIC_STATS_LEN; i++) { + char *p = (char *)&vstats + vnic_gstrings_stats[i].stat_offset; + + data[i] = (vnic_gstrings_stats[i].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } +} + +/* vnic_get_strings - get strings */ +static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < VNIC_STATS_LEN; i++) + memcpy(data + i * ETH_GSTRING_LEN, + vnic_gstrings_stats[i].stat_string, + ETH_GSTRING_LEN); +} + +/* ethtool ops */ +static const struct ethtool_ops opa_vnic_ethtool_ops = { + .get_drvinfo = vnic_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = vnic_get_strings, + .get_sset_count = vnic_get_sset_count, + .get_ethtool_stats = vnic_get_ethtool_stats, +}; + +/* opa_vnic_set_ethtool_ops - set ethtool ops */ +void opa_vnic_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &opa_vnic_ethtool_ops; +} diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h new file mode 100644 index 000000000..dd942dd64 --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h @@ -0,0 +1,329 @@ +#ifndef _OPA_VNIC_INTERNAL_H +#define _OPA_VNIC_INTERNAL_H +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains OPA VNIC driver internal declarations + */ + +#include <linux/bitops.h> +#include <linux/etherdevice.h> +#include <linux/hashtable.h> +#include <linux/sizes.h> +#include <rdma/opa_vnic.h> + +#include "opa_vnic_encap.h" + +#define OPA_VNIC_VLAN_PCP(vlan_tci) \ + (((vlan_tci) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT) + +/* Flow to default port redirection table size */ +#define OPA_VNIC_FLOW_TBL_SIZE 32 + +/* Invalid port number */ +#define OPA_VNIC_INVALID_PORT 0xff + +struct opa_vnic_adapter; + +/* + * struct __opa_vesw_info - OPA vnic virtual switch info + * + * Same as opa_vesw_info without bitwise attribute. + */ +struct __opa_vesw_info { + u16 fabric_id; + u16 vesw_id; + + u8 rsvd0[6]; + u16 def_port_mask; + + u8 rsvd1[2]; + u16 pkey; + + u8 rsvd2[4]; + u32 u_mcast_dlid; + u32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT]; + + u32 rc; + + u8 rsvd3[56]; + u16 eth_mtu; + u8 rsvd4[2]; +} __packed; + +/* + * struct __opa_per_veswport_info - OPA vnic per port info + * + * Same as opa_per_veswport_info without bitwise attribute. + */ +struct __opa_per_veswport_info { + u32 port_num; + + u8 eth_link_status; + u8 rsvd0[3]; + + u8 base_mac_addr[ETH_ALEN]; + u8 config_state; + u8 oper_state; + + u16 max_mac_tbl_ent; + u16 max_smac_ent; + u32 mac_tbl_digest; + u8 rsvd1[4]; + + u32 encap_slid; + + u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP]; + u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP]; + u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP]; + u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP]; + + u8 non_vlan_sc_uc; + u8 non_vlan_vl_uc; + u8 non_vlan_sc_mc; + u8 non_vlan_vl_mc; + + u8 rsvd2[48]; + + u16 uc_macs_gen_count; + u16 mc_macs_gen_count; + + u8 rsvd3[8]; +} __packed; + +/* + * struct __opa_veswport_info - OPA vnic port info + * + * Same as opa_veswport_info without bitwise attribute. + */ +struct __opa_veswport_info { + struct __opa_vesw_info vesw; + struct __opa_per_veswport_info vport; +}; + +/* + * struct __opa_veswport_trap - OPA vnic trap info + * + * Same as opa_veswport_trap without bitwise attribute. + */ +struct __opa_veswport_trap { + u16 fabric_id; + u16 veswid; + u32 veswportnum; + u16 opaportnum; + u8 veswportindex; + u8 opcode; + u32 reserved; +} __packed; + +/** + * struct opa_vnic_ctrl_port - OPA virtual NIC control port + * @ibdev: pointer to ib device + * @ops: opa vnic control operations + * @num_ports: number of opa ports + */ +struct opa_vnic_ctrl_port { + struct ib_device *ibdev; + struct opa_vnic_ctrl_ops *ops; + u8 num_ports; +}; + +/** + * struct opa_vnic_adapter - OPA VNIC netdev private data structure + * @netdev: pointer to associated netdev + * @ibdev: ib device + * @cport: pointer to opa vnic control port + * @rn_ops: rdma netdev's net_device_ops + * @port_num: OPA port number + * @vport_num: vesw port number + * @lock: adapter lock + * @info: virtual ethernet switch port information + * @vema_mac_addr: mac address configured by vema + * @umac_hash: unicast maclist hash + * @mmac_hash: multicast maclist hash + * @mactbl: hash table of MAC entries + * @mactbl_lock: mac table lock + * @stats_lock: statistics lock + * @flow_tbl: flow to default port redirection table + * @trap_timeout: trap timeout + * @trap_count: no. of traps allowed within timeout period + */ +struct opa_vnic_adapter { + struct net_device *netdev; + struct ib_device *ibdev; + struct opa_vnic_ctrl_port *cport; + const struct net_device_ops *rn_ops; + + u8 port_num; + u8 vport_num; + + /* Lock used around concurrent updates to netdev */ + struct mutex lock; + + struct __opa_veswport_info info; + u8 vema_mac_addr[ETH_ALEN]; + u32 umac_hash; + u32 mmac_hash; + struct hlist_head __rcu *mactbl; + + /* Lock used to protect updates to mac table */ + struct mutex mactbl_lock; + + /* Lock used to protect access to vnic counters */ + spinlock_t stats_lock; + + u8 flow_tbl[OPA_VNIC_FLOW_TBL_SIZE]; + + unsigned long trap_timeout; + u8 trap_count; +}; + +/* Same as opa_veswport_mactable_entry, but without bitwise attribute */ +struct __opa_vnic_mactable_entry { + u8 mac_addr[ETH_ALEN]; + u8 mac_addr_mask[ETH_ALEN]; + u32 dlid_sd; +} __packed; + +/** + * struct opa_vnic_mac_tbl_node - OPA VNIC mac table node + * @hlist: hash list handle + * @index: index of entry in the mac table + * @entry: entry in the table + */ +struct opa_vnic_mac_tbl_node { + struct hlist_node hlist; + u16 index; + struct __opa_vnic_mactable_entry entry; +}; + +#define v_dbg(format, arg...) \ + netdev_dbg(adapter->netdev, format, ## arg) +#define v_err(format, arg...) \ + netdev_err(adapter->netdev, format, ## arg) +#define v_info(format, arg...) \ + netdev_info(adapter->netdev, format, ## arg) +#define v_warn(format, arg...) \ + netdev_warn(adapter->netdev, format, ## arg) + +#define c_err(format, arg...) \ + dev_err(&cport->ibdev->dev, format, ## arg) +#define c_info(format, arg...) \ + dev_info(&cport->ibdev->dev, format, ## arg) +#define c_dbg(format, arg...) \ + dev_dbg(&cport->ibdev->dev, format, ## arg) + +/* The maximum allowed entries in the mac table */ +#define OPA_VNIC_MAC_TBL_MAX_ENTRIES 2048 +/* Limit of smac entries in mac table */ +#define OPA_VNIC_MAX_SMAC_LIMIT 256 + +/* The last octet of the MAC address is used as the key to the hash table */ +#define OPA_VNIC_MAC_HASH_IDX 5 + +/* The VNIC MAC hash table is of size 2^8 */ +#define OPA_VNIC_MAC_TBL_HASH_BITS 8 +#define OPA_VNIC_MAC_TBL_SIZE BIT(OPA_VNIC_MAC_TBL_HASH_BITS) + +/* VNIC HASH MACROS */ +#define vnic_hash_init(hashtable) __hash_init(hashtable, OPA_VNIC_MAC_TBL_SIZE) + +#define vnic_hash_add(hashtable, node, key) \ + hlist_add_head(node, \ + &hashtable[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))]) + +#define vnic_hash_for_each_safe(name, bkt, tmp, obj, member) \ + for ((bkt) = 0, obj = NULL; \ + !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \ + hlist_for_each_entry_safe(obj, tmp, &name[bkt], member) + +#define vnic_hash_for_each_possible(name, obj, member, key) \ + hlist_for_each_entry(obj, \ + &name[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))], member) + +#define vnic_hash_for_each(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; \ + !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \ + hlist_for_each_entry(obj, &name[bkt], member) + +extern char opa_vnic_driver_name[]; + +struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev, + u8 port_num, u8 vport_num); +void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter); +void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb); +u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb); +u8 opa_vnic_calc_entropy(struct sk_buff *skb); +void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter); +void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter); +void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter, + struct opa_veswport_mactable *tbl); +int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter, + struct opa_veswport_mactable *tbl); +void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, + struct opa_veswport_iface_macs *macs); +void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter, + struct opa_veswport_iface_macs *macs); +void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter, + struct opa_veswport_summary_counters *cntrs); +void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter, + struct opa_veswport_error_counters *cntrs); +void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter, + struct opa_vesw_info *info); +void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter, + struct opa_vesw_info *info); +void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter, + struct opa_per_veswport_info *info); +void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter, + struct opa_per_veswport_info *info); +void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event); +void opa_vnic_set_ethtool_ops(struct net_device *netdev); +void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter, + struct __opa_veswport_trap *data, u32 lid); + +#endif /* _OPA_VNIC_INTERNAL_H */ diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c new file mode 100644 index 000000000..071f35711 --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -0,0 +1,400 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains OPA Virtual Network Interface Controller (VNIC) driver + * netdev functionality. + */ + +#include <linux/if_vlan.h> +#include <linux/crc32.h> + +#include "opa_vnic_internal.h" + +#define OPA_TX_TIMEOUT_MS 1000 + +#define OPA_VNIC_SKB_HEADROOM \ + ALIGN((OPA_VNIC_HDR_LEN + OPA_VNIC_SKB_MDATA_LEN), 8) + +/* This function is overloaded for opa_vnic specific implementation */ +static void opa_vnic_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + struct opa_vnic_stats vstats; + + memset(&vstats, 0, sizeof(vstats)); + spin_lock(&adapter->stats_lock); + adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats); + spin_unlock(&adapter->stats_lock); + memcpy(stats, &vstats.netstats, sizeof(*stats)); +} + +/* opa_netdev_start_xmit - transmit function */ +static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + + v_dbg("xmit: queue %d skb len %d\n", skb->queue_mapping, skb->len); + /* pad to ensure mininum ethernet packet length */ + if (unlikely(skb->len < ETH_ZLEN)) { + if (skb_padto(skb, ETH_ZLEN)) + return NETDEV_TX_OK; + + skb_put(skb, ETH_ZLEN - skb->len); + } + + opa_vnic_encap_skb(adapter, skb); + return adapter->rn_ops->ndo_start_xmit(skb, netdev); +} + +static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + struct opa_vnic_skb_mdata *mdata; + int rc; + + /* pass entropy and vl as metadata in skb */ + mdata = skb_push(skb, sizeof(*mdata)); + mdata->entropy = opa_vnic_calc_entropy(skb); + mdata->vl = opa_vnic_get_vl(adapter, skb); + rc = adapter->rn_ops->ndo_select_queue(netdev, skb, sb_dev); + skb_pull(skb, sizeof(*mdata)); + return rc; +} + +static void opa_vnic_update_state(struct opa_vnic_adapter *adapter, bool up) +{ + struct __opa_veswport_info *info = &adapter->info; + + mutex_lock(&adapter->lock); + /* Operational state can only be DROP_ALL or FORWARDING */ + if ((info->vport.config_state == OPA_VNIC_STATE_FORWARDING) && up) { + info->vport.oper_state = OPA_VNIC_STATE_FORWARDING; + info->vport.eth_link_status = OPA_VNIC_ETH_LINK_UP; + } else { + info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL; + info->vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN; + } + + if (info->vport.config_state == OPA_VNIC_STATE_FORWARDING) + netif_dormant_off(adapter->netdev); + else + netif_dormant_on(adapter->netdev); + mutex_unlock(&adapter->lock); +} + +/* opa_vnic_process_vema_config - process vema configuration updates */ +void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter) +{ + struct __opa_veswport_info *info = &adapter->info; + struct rdma_netdev *rn = netdev_priv(adapter->netdev); + u8 port_num[OPA_VESW_MAX_NUM_DEF_PORT] = { 0 }; + struct net_device *netdev = adapter->netdev; + u8 i, port_count = 0; + u16 port_mask; + + /* If the base_mac_addr is changed, update the interface mac address */ + if (memcmp(info->vport.base_mac_addr, adapter->vema_mac_addr, + ARRAY_SIZE(info->vport.base_mac_addr))) { + struct sockaddr saddr; + + memcpy(saddr.sa_data, info->vport.base_mac_addr, + ARRAY_SIZE(info->vport.base_mac_addr)); + mutex_lock(&adapter->lock); + eth_commit_mac_addr_change(netdev, &saddr); + memcpy(adapter->vema_mac_addr, + info->vport.base_mac_addr, ETH_ALEN); + mutex_unlock(&adapter->lock); + } + + rn->set_id(netdev, info->vesw.vesw_id); + + /* Handle MTU limit change */ + rtnl_lock(); + netdev->max_mtu = max_t(unsigned int, info->vesw.eth_mtu, + netdev->min_mtu); + if (netdev->mtu > netdev->max_mtu) + dev_set_mtu(netdev, netdev->max_mtu); + rtnl_unlock(); + + /* Update flow to default port redirection table */ + port_mask = info->vesw.def_port_mask; + for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) { + if (port_mask & 1) + port_num[port_count++] = i; + port_mask >>= 1; + } + + /* + * Build the flow table. Flow table is required when destination LID + * is not available. Up to OPA_VNIC_FLOW_TBL_SIZE flows supported. + * Each flow need a default port number to get its dlid from the + * u_ucast_dlid array. + */ + for (i = 0; i < OPA_VNIC_FLOW_TBL_SIZE; i++) + adapter->flow_tbl[i] = port_count ? port_num[i % port_count] : + OPA_VNIC_INVALID_PORT; + + /* update state */ + opa_vnic_update_state(adapter, !!(netdev->flags & IFF_UP)); +} + +/* + * Set the power on default values in adapter's vema interface structure. + */ +static inline void opa_vnic_set_pod_values(struct opa_vnic_adapter *adapter) +{ + adapter->info.vport.max_mac_tbl_ent = OPA_VNIC_MAC_TBL_MAX_ENTRIES; + adapter->info.vport.max_smac_ent = OPA_VNIC_MAX_SMAC_LIMIT; + adapter->info.vport.config_state = OPA_VNIC_STATE_DROP_ALL; + adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN; + adapter->info.vesw.eth_mtu = ETH_DATA_LEN; +} + +/* opa_vnic_set_mac_addr - change mac address */ +static int opa_vnic_set_mac_addr(struct net_device *netdev, void *addr) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + struct sockaddr *sa = addr; + int rc; + + if (!memcmp(netdev->dev_addr, sa->sa_data, ETH_ALEN)) + return 0; + + mutex_lock(&adapter->lock); + rc = eth_mac_addr(netdev, addr); + mutex_unlock(&adapter->lock); + if (rc) + return rc; + + adapter->info.vport.uc_macs_gen_count++; + opa_vnic_vema_report_event(adapter, + OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE); + return 0; +} + +/* + * opa_vnic_mac_send_event - post event on possible mac list exchange + * Send trap when digest from uc/mc mac list differs from previous run. + * Digest is evaluated similar to how cksum does. + */ +static void opa_vnic_mac_send_event(struct net_device *netdev, u8 event) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + struct netdev_hw_addr *ha; + struct netdev_hw_addr_list *hw_list; + u32 *ref_crc; + u32 l, crc = 0; + + switch (event) { + case OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE: + hw_list = &netdev->uc; + adapter->info.vport.uc_macs_gen_count++; + ref_crc = &adapter->umac_hash; + break; + case OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE: + hw_list = &netdev->mc; + adapter->info.vport.mc_macs_gen_count++; + ref_crc = &adapter->mmac_hash; + break; + default: + return; + } + netdev_hw_addr_list_for_each(ha, hw_list) { + crc = crc32_le(crc, ha->addr, ETH_ALEN); + } + l = netdev_hw_addr_list_count(hw_list) * ETH_ALEN; + crc = ~crc32_le(crc, (void *)&l, sizeof(l)); + + if (crc != *ref_crc) { + *ref_crc = crc; + opa_vnic_vema_report_event(adapter, event); + } +} + +/* opa_vnic_set_rx_mode - handle uc/mc mac list change */ +static void opa_vnic_set_rx_mode(struct net_device *netdev) +{ + opa_vnic_mac_send_event(netdev, + OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE); + + opa_vnic_mac_send_event(netdev, + OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE); +} + +/* opa_netdev_open - activate network interface */ +static int opa_netdev_open(struct net_device *netdev) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + int rc; + + rc = adapter->rn_ops->ndo_open(adapter->netdev); + if (rc) { + v_dbg("open failed %d\n", rc); + return rc; + } + + /* Update status and send trap */ + opa_vnic_update_state(adapter, true); + opa_vnic_vema_report_event(adapter, + OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE); + return 0; +} + +/* opa_netdev_close - disable network interface */ +static int opa_netdev_close(struct net_device *netdev) +{ + struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); + int rc; + + rc = adapter->rn_ops->ndo_stop(adapter->netdev); + if (rc) { + v_dbg("close failed %d\n", rc); + return rc; + } + + /* Update status and send trap */ + opa_vnic_update_state(adapter, false); + opa_vnic_vema_report_event(adapter, + OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE); + return 0; +} + +/* netdev ops */ +static const struct net_device_ops opa_netdev_ops = { + .ndo_open = opa_netdev_open, + .ndo_stop = opa_netdev_close, + .ndo_start_xmit = opa_netdev_start_xmit, + .ndo_get_stats64 = opa_vnic_get_stats64, + .ndo_set_rx_mode = opa_vnic_set_rx_mode, + .ndo_select_queue = opa_vnic_select_queue, + .ndo_set_mac_address = opa_vnic_set_mac_addr, +}; + +/* opa_vnic_add_netdev - create vnic netdev interface */ +struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev, + u8 port_num, u8 vport_num) +{ + struct opa_vnic_adapter *adapter; + struct net_device *netdev; + struct rdma_netdev *rn; + int rc; + + netdev = ibdev->ops.alloc_rdma_netdev(ibdev, port_num, + RDMA_NETDEV_OPA_VNIC, + "veth%d", NET_NAME_UNKNOWN, + ether_setup); + if (!netdev) + return ERR_PTR(-ENOMEM); + else if (IS_ERR(netdev)) + return ERR_CAST(netdev); + + rn = netdev_priv(netdev); + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) { + rc = -ENOMEM; + goto adapter_err; + } + + rn->clnt_priv = adapter; + rn->hca = ibdev; + rn->port_num = port_num; + adapter->netdev = netdev; + adapter->ibdev = ibdev; + adapter->port_num = port_num; + adapter->vport_num = vport_num; + adapter->rn_ops = netdev->netdev_ops; + + netdev->netdev_ops = &opa_netdev_ops; + netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netdev->hard_header_len += OPA_VNIC_SKB_HEADROOM; + mutex_init(&adapter->lock); + mutex_init(&adapter->mactbl_lock); + spin_lock_init(&adapter->stats_lock); + + SET_NETDEV_DEV(netdev, ibdev->dev.parent); + + opa_vnic_set_ethtool_ops(netdev); + + opa_vnic_set_pod_values(adapter); + + rc = register_netdev(netdev); + if (rc) + goto netdev_err; + + netif_carrier_off(netdev); + netif_dormant_on(netdev); + v_info("initialized\n"); + + return adapter; +netdev_err: + mutex_destroy(&adapter->lock); + mutex_destroy(&adapter->mactbl_lock); + kfree(adapter); +adapter_err: + rn->free_rdma_netdev(netdev); + + return ERR_PTR(rc); +} + +/* opa_vnic_rem_netdev - remove vnic netdev interface */ +void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct rdma_netdev *rn = netdev_priv(netdev); + + v_info("removing\n"); + unregister_netdev(netdev); + opa_vnic_release_mac_tbl(adapter); + mutex_destroy(&adapter->lock); + mutex_destroy(&adapter->mactbl_lock); + kfree(adapter); + rn->free_rdma_netdev(netdev); +} diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c new file mode 100644 index 000000000..21c6cea8b --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -0,0 +1,1056 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2021 Cornelis Networks. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains OPX Virtual Network Interface Controller (VNIC) + * Ethernet Management Agent (EMA) driver + */ + +#include <linux/module.h> +#include <linux/xarray.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_verbs.h> +#include <rdma/opa_smi.h> +#include <rdma/opa_port_info.h> + +#include "opa_vnic_internal.h" + +char opa_vnic_driver_name[] = "opa_vnic"; + +/* + * The trap service level is kept in bits 3 to 7 in the trap_sl_rsvd + * field in the class port info MAD. + */ +#define GET_TRAP_SL_FROM_CLASS_PORT_INFO(x) (((x) >> 3) & 0x1f) + +/* Cap trap bursts to a reasonable limit good for normal cases */ +#define OPA_VNIC_TRAP_BURST_LIMIT 4 + +/* + * VNIC trap limit timeout. + * Inverse of cap2_mask response time out (1.0737 secs) = 0.9 + * secs approx IB spec 13.4.6.2.1 PortInfoSubnetTimeout and + * 13.4.9 Traps. + */ +#define OPA_VNIC_TRAP_TIMEOUT ((4096 * (1UL << 18)) / 1000) + +#define OPA_VNIC_UNSUP_ATTR \ + cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB) + +#define OPA_VNIC_INVAL_ATTR \ + cpu_to_be16(IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE) + +#define OPA_VNIC_CLASS_CAP_TRAP 0x1 + +/* Maximum number of VNIC ports supported */ +#define OPA_VNIC_MAX_NUM_VPORT 255 + +/** + * struct opa_vnic_vema_port -- VNIC VEMA port details + * @cport: pointer to port + * @mad_agent: pointer to mad agent for port + * @class_port_info: Class port info information. + * @tid: Transaction id + * @port_num: OPA port number + * @vports: vnic ports + * @event_handler: ib event handler + * @lock: adapter interface lock + */ +struct opa_vnic_vema_port { + struct opa_vnic_ctrl_port *cport; + struct ib_mad_agent *mad_agent; + struct opa_class_port_info class_port_info; + u64 tid; + u8 port_num; + struct xarray vports; + struct ib_event_handler event_handler; + + /* Lock to query/update network adapter */ + struct mutex lock; +}; + +static int opa_vnic_vema_add_one(struct ib_device *device); +static void opa_vnic_vema_rem_one(struct ib_device *device, + void *client_data); + +static struct ib_client opa_vnic_client = { + .name = opa_vnic_driver_name, + .add = opa_vnic_vema_add_one, + .remove = opa_vnic_vema_rem_one, +}; + +/** + * vema_get_vport_num -- Get the vnic from the mad + * @recvd_mad: Received mad + * + * Return: returns value of the vnic port number + */ +static inline u8 vema_get_vport_num(struct opa_vnic_vema_mad *recvd_mad) +{ + return be32_to_cpu(recvd_mad->mad_hdr.attr_mod) & 0xff; +} + +/** + * vema_get_vport_adapter -- Get vnic port adapter from recvd mad + * @recvd_mad: received mad + * @port: ptr to port struct on which MAD was recvd + * + * Return: vnic adapter + */ +static inline struct opa_vnic_adapter * +vema_get_vport_adapter(struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_port *port) +{ + u8 vport_num = vema_get_vport_num(recvd_mad); + + return xa_load(&port->vports, vport_num); +} + +/** + * vema_mac_tbl_req_ok -- Check if mac request has correct values + * @mac_tbl: mac table + * + * This function checks for the validity of the offset and number of + * entries required. + * + * Return: true if offset and num_entries are valid + */ +static inline bool vema_mac_tbl_req_ok(struct opa_veswport_mactable *mac_tbl) +{ + u16 offset, num_entries; + u16 req_entries = ((OPA_VNIC_EMA_DATA - sizeof(*mac_tbl)) / + sizeof(mac_tbl->tbl_entries[0])); + + offset = be16_to_cpu(mac_tbl->offset); + num_entries = be16_to_cpu(mac_tbl->num_entries); + + return ((num_entries <= req_entries) && + (offset + num_entries <= OPA_VNIC_MAC_TBL_MAX_ENTRIES)); +} + +/* + * Return the power on default values in the port info structure + * in big endian format as required by MAD. + */ +static inline void vema_get_pod_values(struct opa_veswport_info *port_info) +{ + memset(port_info, 0, sizeof(*port_info)); + port_info->vport.max_mac_tbl_ent = + cpu_to_be16(OPA_VNIC_MAC_TBL_MAX_ENTRIES); + port_info->vport.max_smac_ent = + cpu_to_be16(OPA_VNIC_MAX_SMAC_LIMIT); + port_info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL; + port_info->vport.config_state = OPA_VNIC_STATE_DROP_ALL; + port_info->vesw.eth_mtu = cpu_to_be16(ETH_DATA_LEN); +} + +/** + * vema_add_vport -- Add a new vnic port + * @port: ptr to opa_vnic_vema_port struct + * @vport_num: vnic port number (to be added) + * + * Return a pointer to the vnic adapter structure + */ +static struct opa_vnic_adapter *vema_add_vport(struct opa_vnic_vema_port *port, + u8 vport_num) +{ + struct opa_vnic_ctrl_port *cport = port->cport; + struct opa_vnic_adapter *adapter; + + adapter = opa_vnic_add_netdev(cport->ibdev, port->port_num, vport_num); + if (!IS_ERR(adapter)) { + int rc; + + adapter->cport = cport; + rc = xa_insert(&port->vports, vport_num, adapter, GFP_KERNEL); + if (rc < 0) { + opa_vnic_rem_netdev(adapter); + adapter = ERR_PTR(rc); + } + } + + return adapter; +} + +/** + * vema_get_class_port_info -- Get class info for port + * @port: Port on whic MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + * + * This function copies the latest class port info value set for the + * port and stores it for generating traps + */ +static void vema_get_class_port_info(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_class_port_info *port_info; + + port_info = (struct opa_class_port_info *)rsp_mad->data; + memcpy(port_info, &port->class_port_info, sizeof(*port_info)); + port_info->base_version = OPA_MGMT_BASE_VERSION; + port_info->class_version = OPA_EMA_CLASS_VERSION; + + /* + * Set capability mask bit indicating agent generates traps, + * and set the maximum number of VNIC ports supported. + */ + port_info->cap_mask = cpu_to_be16((OPA_VNIC_CLASS_CAP_TRAP | + (OPA_VNIC_MAX_NUM_VPORT << 8))); + + /* + * Since a get routine is always sent by the EM first we + * set the expected response time to + * 4.096 usec * 2^18 == 1.0737 sec here. + */ + port_info->cap_mask2_resp_time = cpu_to_be32(18); +} + +/** + * vema_set_class_port_info -- Get class info for port + * @port: Port on whic MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + * + * This function updates the port class info for the specific vnic + * and sets up the response mad data + */ +static void vema_set_class_port_info(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + memcpy(&port->class_port_info, recvd_mad->data, + sizeof(port->class_port_info)); + + vema_get_class_port_info(port, recvd_mad, rsp_mad); +} + +/** + * vema_get_veswport_info -- Get veswport info + * @port: source port on which MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + */ +static void vema_get_veswport_info(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_veswport_info *port_info = + (struct opa_veswport_info *)rsp_mad->data; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (adapter) { + memset(port_info, 0, sizeof(*port_info)); + opa_vnic_get_vesw_info(adapter, &port_info->vesw); + opa_vnic_get_per_veswport_info(adapter, + &port_info->vport); + } else { + vema_get_pod_values(port_info); + } +} + +/** + * vema_set_veswport_info -- Set veswport info + * @port: source port on which MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + * + * This function gets the port class infor for vnic + */ +static void vema_set_veswport_info(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_vnic_ctrl_port *cport = port->cport; + struct opa_veswport_info *port_info; + struct opa_vnic_adapter *adapter; + u8 vport_num; + + vport_num = vema_get_vport_num(recvd_mad); + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (!adapter) { + adapter = vema_add_vport(port, vport_num); + if (IS_ERR(adapter)) { + c_err("failed to add vport %d: %ld\n", + vport_num, PTR_ERR(adapter)); + goto err_exit; + } + } + + port_info = (struct opa_veswport_info *)recvd_mad->data; + opa_vnic_set_vesw_info(adapter, &port_info->vesw); + opa_vnic_set_per_veswport_info(adapter, &port_info->vport); + + /* Process the new config settings */ + opa_vnic_process_vema_config(adapter); + + vema_get_veswport_info(port, recvd_mad, rsp_mad); + return; + +err_exit: + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; +} + +/** + * vema_get_mac_entries -- Get MAC entries in VNIC MAC table + * @port: source port on which MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + * + * This function gets the MAC entries that are programmed into + * the VNIC MAC forwarding table. It checks for the validity of + * the index into the MAC table and the number of entries that + * are to be retrieved. + */ +static void vema_get_mac_entries(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_veswport_mactable *mac_tbl_in, *mac_tbl_out; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (!adapter) { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + return; + } + + mac_tbl_in = (struct opa_veswport_mactable *)recvd_mad->data; + mac_tbl_out = (struct opa_veswport_mactable *)rsp_mad->data; + + if (vema_mac_tbl_req_ok(mac_tbl_in)) { + mac_tbl_out->offset = mac_tbl_in->offset; + mac_tbl_out->num_entries = mac_tbl_in->num_entries; + opa_vnic_query_mac_tbl(adapter, mac_tbl_out); + } else { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + } +} + +/** + * vema_set_mac_entries -- Set MAC entries in VNIC MAC table + * @port: source port on which MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + * + * This function sets the MAC entries in the VNIC forwarding table + * It checks for the validity of the index and the number of forwarding + * table entries to be programmed. + */ +static void vema_set_mac_entries(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_veswport_mactable *mac_tbl; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (!adapter) { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + return; + } + + mac_tbl = (struct opa_veswport_mactable *)recvd_mad->data; + if (vema_mac_tbl_req_ok(mac_tbl)) { + if (opa_vnic_update_mac_tbl(adapter, mac_tbl)) + rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR; + } else { + rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR; + } + vema_get_mac_entries(port, recvd_mad, rsp_mad); +} + +/** + * vema_set_delete_vesw -- Reset VESW info to POD values + * @port: source port on which MAD was received + * @recvd_mad: pointer to the received mad + * @rsp_mad: pointer to respose mad + * + * This function clears all the fields of veswport info for the requested vesw + * and sets them back to the power-on default values. It does not delete the + * vesw. + */ +static void vema_set_delete_vesw(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_veswport_info *port_info = + (struct opa_veswport_info *)rsp_mad->data; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (!adapter) { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + return; + } + + vema_get_pod_values(port_info); + opa_vnic_set_vesw_info(adapter, &port_info->vesw); + opa_vnic_set_per_veswport_info(adapter, &port_info->vport); + + /* Process the new config settings */ + opa_vnic_process_vema_config(adapter); + + opa_vnic_release_mac_tbl(adapter); + + vema_get_veswport_info(port, recvd_mad, rsp_mad); +} + +/** + * vema_get_mac_list -- Get the unicast/multicast macs. + * @port: source port on which MAD was received + * @recvd_mad: Received mad contains fields to set vnic parameters + * @rsp_mad: Response mad to be built + * @attr_id: Attribute ID indicating multicast or unicast mac list + */ +static void vema_get_mac_list(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad, + u16 attr_id) +{ + struct opa_veswport_iface_macs *macs_in, *macs_out; + int max_entries = (OPA_VNIC_EMA_DATA - sizeof(*macs_out)) / ETH_ALEN; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (!adapter) { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + return; + } + + macs_in = (struct opa_veswport_iface_macs *)recvd_mad->data; + macs_out = (struct opa_veswport_iface_macs *)rsp_mad->data; + + macs_out->start_idx = macs_in->start_idx; + if (macs_in->num_macs_in_msg) + macs_out->num_macs_in_msg = macs_in->num_macs_in_msg; + else + macs_out->num_macs_in_msg = cpu_to_be16(max_entries); + + if (attr_id == OPA_EM_ATTR_IFACE_MCAST_MACS) + opa_vnic_query_mcast_macs(adapter, macs_out); + else + opa_vnic_query_ucast_macs(adapter, macs_out); +} + +/** + * vema_get_summary_counters -- Gets summary counters. + * @port: source port on which MAD was received + * @recvd_mad: Received mad contains fields to set vnic parameters + * @rsp_mad: Response mad to be built + */ +static void vema_get_summary_counters(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_veswport_summary_counters *cntrs; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (adapter) { + cntrs = (struct opa_veswport_summary_counters *)rsp_mad->data; + opa_vnic_get_summary_counters(adapter, cntrs); + } else { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + } +} + +/** + * vema_get_error_counters -- Gets summary counters. + * @port: source port on which MAD was received + * @recvd_mad: Received mad contains fields to set vnic parameters + * @rsp_mad: Response mad to be built + */ +static void vema_get_error_counters(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + struct opa_veswport_error_counters *cntrs; + struct opa_vnic_adapter *adapter; + + adapter = vema_get_vport_adapter(recvd_mad, port); + if (adapter) { + cntrs = (struct opa_veswport_error_counters *)rsp_mad->data; + opa_vnic_get_error_counters(adapter, cntrs); + } else { + rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR; + } +} + +/** + * vema_get -- Process received get MAD + * @port: source port on which MAD was received + * @recvd_mad: Received mad + * @rsp_mad: Response mad to be built + */ +static void vema_get(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id); + + switch (attr_id) { + case OPA_EM_ATTR_CLASS_PORT_INFO: + vema_get_class_port_info(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_VESWPORT_INFO: + vema_get_veswport_info(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES: + vema_get_mac_entries(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_IFACE_UCAST_MACS: + case OPA_EM_ATTR_IFACE_MCAST_MACS: + vema_get_mac_list(port, recvd_mad, rsp_mad, attr_id); + break; + case OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS: + vema_get_summary_counters(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS: + vema_get_error_counters(port, recvd_mad, rsp_mad); + break; + default: + rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR; + break; + } +} + +/** + * vema_set -- Process received set MAD + * @port: source port on which MAD was received + * @recvd_mad: Received mad contains fields to set vnic parameters + * @rsp_mad: Response mad to be built + */ +static void vema_set(struct opa_vnic_vema_port *port, + struct opa_vnic_vema_mad *recvd_mad, + struct opa_vnic_vema_mad *rsp_mad) +{ + u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id); + + switch (attr_id) { + case OPA_EM_ATTR_CLASS_PORT_INFO: + vema_set_class_port_info(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_VESWPORT_INFO: + vema_set_veswport_info(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES: + vema_set_mac_entries(port, recvd_mad, rsp_mad); + break; + case OPA_EM_ATTR_DELETE_VESW: + vema_set_delete_vesw(port, recvd_mad, rsp_mad); + break; + default: + rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR; + break; + } +} + +/** + * vema_send -- Send handler for VEMA MAD agent + * @mad_agent: pointer to the mad agent + * @mad_wc: pointer to mad send work completion information + * + * Free all the data structures associated with the sent MAD + */ +static void vema_send(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_wc) +{ + rdma_destroy_ah(mad_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE); + ib_free_send_mad(mad_wc->send_buf); +} + +/** + * vema_recv -- Recv handler for VEMA MAD agent + * @mad_agent: pointer to the mad agent + * @send_buf: Send buffer if found, else NULL + * @mad_wc: pointer to mad send work completion information + * + * Handle only set and get methods and respond to other methods + * as unsupported. Allocate response buffer and address handle + * for the response MAD. + */ +static void vema_recv(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, + struct ib_mad_recv_wc *mad_wc) +{ + struct opa_vnic_vema_port *port; + struct ib_ah *ah; + struct ib_mad_send_buf *rsp; + struct opa_vnic_vema_mad *vema_mad; + + if (!mad_wc || !mad_wc->recv_buf.mad) + return; + + port = mad_agent->context; + ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc, + mad_wc->recv_buf.grh, mad_agent->port_num); + if (IS_ERR(ah)) + goto free_recv_mad; + + rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp, + mad_wc->wc->pkey_index, 0, + IB_MGMT_VENDOR_HDR, OPA_VNIC_EMA_DATA, + GFP_KERNEL, OPA_MGMT_BASE_VERSION); + if (IS_ERR(rsp)) + goto err_rsp; + + rsp->ah = ah; + vema_mad = rsp->mad; + memcpy(vema_mad, mad_wc->recv_buf.mad, IB_MGMT_VENDOR_HDR); + vema_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + vema_mad->mad_hdr.status = 0; + + /* Lock ensures network adapter is not removed */ + mutex_lock(&port->lock); + + switch (mad_wc->recv_buf.mad->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + vema_get(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad, + vema_mad); + break; + case IB_MGMT_METHOD_SET: + vema_set(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad, + vema_mad); + break; + default: + vema_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR; + break; + } + mutex_unlock(&port->lock); + + if (!ib_post_send_mad(rsp, NULL)) { + /* + * with post send successful ah and send mad + * will be destroyed in send handler + */ + goto free_recv_mad; + } + + ib_free_send_mad(rsp); + +err_rsp: + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); +free_recv_mad: + ib_free_recv_mad(mad_wc); +} + +/** + * vema_get_port -- Gets the opa_vnic_vema_port + * @cport: pointer to control dev + * @port_num: Port number + * + * This function loops through the ports and returns + * the opa_vnic_vema port structure that is associated + * with the OPA port number + * + * Return: ptr to requested opa_vnic_vema_port strucure + * if success, NULL if not + */ +static struct opa_vnic_vema_port * +vema_get_port(struct opa_vnic_ctrl_port *cport, u8 port_num) +{ + struct opa_vnic_vema_port *port = (void *)cport + sizeof(*cport); + + if (port_num > cport->num_ports) + return NULL; + + return port + (port_num - 1); +} + +/** + * opa_vnic_vema_send_trap -- This function sends a trap to the EM + * @adapter: pointer to vnic adapter + * @data: pointer to trap data filled by calling function + * @lid: issuers lid (encap_slid from vesw_port_info) + * + * This function is called from the VNIC driver to send a trap if there + * is somethng the EM should be notified about. These events currently + * are + * 1) UNICAST INTERFACE MACADDRESS changes + * 2) MULTICAST INTERFACE MACADDRESS changes + * 3) ETHERNET LINK STATUS changes + * While allocating the send mad the remote site qpn used is 1 + * as this is the well known QP. + * + */ +void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter, + struct __opa_veswport_trap *data, u32 lid) +{ + struct opa_vnic_ctrl_port *cport = adapter->cport; + struct ib_mad_send_buf *send_buf; + struct opa_vnic_vema_port *port; + struct ib_device *ibp; + struct opa_vnic_vema_mad_trap *trap_mad; + struct opa_class_port_info *class; + struct rdma_ah_attr ah_attr; + struct ib_ah *ah; + struct opa_veswport_trap *trap; + u32 trap_lid; + u16 pkey_idx; + + if (!cport) + goto err_exit; + ibp = cport->ibdev; + port = vema_get_port(cport, data->opaportnum); + if (!port || !port->mad_agent) + goto err_exit; + + if (time_before(jiffies, adapter->trap_timeout)) { + if (adapter->trap_count == OPA_VNIC_TRAP_BURST_LIMIT) { + v_warn("Trap rate exceeded\n"); + goto err_exit; + } else { + adapter->trap_count++; + } + } else { + adapter->trap_count = 0; + } + + class = &port->class_port_info; + /* Set up address handle */ + memset(&ah_attr, 0, sizeof(ah_attr)); + ah_attr.type = rdma_ah_find_type(ibp, port->port_num); + rdma_ah_set_sl(&ah_attr, + GET_TRAP_SL_FROM_CLASS_PORT_INFO(class->trap_sl_rsvd)); + rdma_ah_set_port_num(&ah_attr, port->port_num); + trap_lid = be32_to_cpu(class->trap_lid); + /* + * check for trap lid validity, must not be zero + * The trap sink could change after we fashion the MAD but since traps + * are not guaranteed we won't use a lock as anyway the change will take + * place even with locking. + */ + if (!trap_lid) { + c_err("%s: Invalid dlid\n", __func__); + goto err_exit; + } + + rdma_ah_set_dlid(&ah_attr, trap_lid); + ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr, 0); + if (IS_ERR(ah)) { + c_err("%s:Couldn't create new AH = %p\n", __func__, ah); + c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__, + rdma_ah_get_dlid(&ah_attr), rdma_ah_get_sl(&ah_attr), + rdma_ah_get_port_num(&ah_attr)); + goto err_exit; + } + + if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_FULL, + &pkey_idx) < 0) { + c_err("%s:full key not found, defaulting to partial\n", + __func__); + if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_PARTIAL, + &pkey_idx) < 0) + pkey_idx = 1; + } + + send_buf = ib_create_send_mad(port->mad_agent, 1, pkey_idx, 0, + IB_MGMT_VENDOR_HDR, IB_MGMT_MAD_DATA, + GFP_ATOMIC, OPA_MGMT_BASE_VERSION); + if (IS_ERR(send_buf)) { + c_err("%s:Couldn't allocate send buf\n", __func__); + goto err_sndbuf; + } + + send_buf->ah = ah; + + /* Set up common MAD hdr */ + trap_mad = send_buf->mad; + trap_mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION; + trap_mad->mad_hdr.mgmt_class = OPA_MGMT_CLASS_INTEL_EMA; + trap_mad->mad_hdr.class_version = OPA_EMA_CLASS_VERSION; + trap_mad->mad_hdr.method = IB_MGMT_METHOD_TRAP; + port->tid++; + trap_mad->mad_hdr.tid = cpu_to_be64(port->tid); + trap_mad->mad_hdr.attr_id = IB_SMP_ATTR_NOTICE; + + /* Set up vendor OUI */ + trap_mad->oui[0] = INTEL_OUI_1; + trap_mad->oui[1] = INTEL_OUI_2; + trap_mad->oui[2] = INTEL_OUI_3; + + /* Setup notice attribute portion */ + trap_mad->notice.gen_type = OPA_INTEL_EMA_NOTICE_TYPE_INFO << 1; + trap_mad->notice.oui_1 = INTEL_OUI_1; + trap_mad->notice.oui_2 = INTEL_OUI_2; + trap_mad->notice.oui_3 = INTEL_OUI_3; + trap_mad->notice.issuer_lid = cpu_to_be32(lid); + + /* copy the actual trap data */ + trap = (struct opa_veswport_trap *)trap_mad->notice.raw_data; + trap->fabric_id = cpu_to_be16(data->fabric_id); + trap->veswid = cpu_to_be16(data->veswid); + trap->veswportnum = cpu_to_be32(data->veswportnum); + trap->opaportnum = cpu_to_be16(data->opaportnum); + trap->veswportindex = data->veswportindex; + trap->opcode = data->opcode; + + /* If successful send set up rate limit timeout else bail */ + if (ib_post_send_mad(send_buf, NULL)) { + ib_free_send_mad(send_buf); + } else { + if (adapter->trap_count) + return; + adapter->trap_timeout = jiffies + + usecs_to_jiffies(OPA_VNIC_TRAP_TIMEOUT); + return; + } + +err_sndbuf: + rdma_destroy_ah(ah, 0); +err_exit: + v_err("Aborting trap\n"); +} + +static void opa_vnic_event(struct ib_event_handler *handler, + struct ib_event *record) +{ + struct opa_vnic_vema_port *port = + container_of(handler, struct opa_vnic_vema_port, event_handler); + struct opa_vnic_ctrl_port *cport = port->cport; + struct opa_vnic_adapter *adapter; + unsigned long index; + + if (record->element.port_num != port->port_num) + return; + + c_dbg("OPA_VNIC received event %d on device %s port %d\n", + record->event, dev_name(&record->device->dev), + record->element.port_num); + + if (record->event != IB_EVENT_PORT_ERR && + record->event != IB_EVENT_PORT_ACTIVE) + return; + + xa_for_each(&port->vports, index, adapter) { + if (record->event == IB_EVENT_PORT_ACTIVE) + netif_carrier_on(adapter->netdev); + else + netif_carrier_off(adapter->netdev); + } +} + +/** + * vema_unregister -- Unregisters agent + * @cport: pointer to control port + * + * This deletes the registration by VEMA for MADs + */ +static void vema_unregister(struct opa_vnic_ctrl_port *cport) +{ + struct opa_vnic_adapter *adapter; + unsigned long index; + int i; + + for (i = 1; i <= cport->num_ports; i++) { + struct opa_vnic_vema_port *port = vema_get_port(cport, i); + + if (!port->mad_agent) + continue; + + /* Lock ensures no MAD is being processed */ + mutex_lock(&port->lock); + xa_for_each(&port->vports, index, adapter) + opa_vnic_rem_netdev(adapter); + mutex_unlock(&port->lock); + + ib_unregister_mad_agent(port->mad_agent); + port->mad_agent = NULL; + mutex_destroy(&port->lock); + xa_destroy(&port->vports); + ib_unregister_event_handler(&port->event_handler); + } +} + +/** + * vema_register -- Registers agent + * @cport: pointer to control port + * + * This function registers the handlers for the VEMA MADs + * + * Return: returns 0 on success. non zero otherwise + */ +static int vema_register(struct opa_vnic_ctrl_port *cport) +{ + struct ib_mad_reg_req reg_req = { + .mgmt_class = OPA_MGMT_CLASS_INTEL_EMA, + .mgmt_class_version = OPA_MGMT_BASE_VERSION, + .oui = { INTEL_OUI_1, INTEL_OUI_2, INTEL_OUI_3 } + }; + int i; + + set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask); + set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask); + + /* register ib event handler and mad agent for each port on dev */ + for (i = 1; i <= cport->num_ports; i++) { + struct opa_vnic_vema_port *port = vema_get_port(cport, i); + int ret; + + port->cport = cport; + port->port_num = i; + + INIT_IB_EVENT_HANDLER(&port->event_handler, + cport->ibdev, opa_vnic_event); + ib_register_event_handler(&port->event_handler); + + xa_init(&port->vports); + mutex_init(&port->lock); + port->mad_agent = ib_register_mad_agent(cport->ibdev, i, + IB_QPT_GSI, ®_req, + IB_MGMT_RMPP_VERSION, + vema_send, vema_recv, + port, 0); + if (IS_ERR(port->mad_agent)) { + ret = PTR_ERR(port->mad_agent); + port->mad_agent = NULL; + mutex_destroy(&port->lock); + vema_unregister(cport); + return ret; + } + } + + return 0; +} + +/** + * opa_vnic_ctrl_config_dev -- This function sends a trap to the EM + * by way of ib_modify_port to indicate support for ethernet on the + * fabric. + * @cport: pointer to control port + * @en: enable or disable ethernet on fabric support + */ +static void opa_vnic_ctrl_config_dev(struct opa_vnic_ctrl_port *cport, bool en) +{ + struct ib_port_modify pm = { 0 }; + int i; + + if (en) + pm.set_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported; + else + pm.clr_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported; + + for (i = 1; i <= cport->num_ports; i++) + ib_modify_port(cport->ibdev, i, IB_PORT_OPA_MASK_CHG, &pm); +} + +/** + * opa_vnic_vema_add_one -- Handle new ib device + * @device: ib device pointer + * + * Allocate the vnic control port and initialize it. + */ +static int opa_vnic_vema_add_one(struct ib_device *device) +{ + struct opa_vnic_ctrl_port *cport; + int rc, size = sizeof(*cport); + + if (!rdma_cap_opa_vnic(device)) + return -EOPNOTSUPP; + + size += device->phys_port_cnt * sizeof(struct opa_vnic_vema_port); + cport = kzalloc(size, GFP_KERNEL); + if (!cport) + return -ENOMEM; + + cport->num_ports = device->phys_port_cnt; + cport->ibdev = device; + + /* Initialize opa vnic management agent (vema) */ + rc = vema_register(cport); + if (!rc) + c_info("VNIC client initialized\n"); + + ib_set_client_data(device, &opa_vnic_client, cport); + opa_vnic_ctrl_config_dev(cport, true); + return 0; +} + +/** + * opa_vnic_vema_rem_one -- Handle ib device removal + * @device: ib device pointer + * @client_data: ib client data + * + * Uninitialize and free the vnic control port. + */ +static void opa_vnic_vema_rem_one(struct ib_device *device, + void *client_data) +{ + struct opa_vnic_ctrl_port *cport = client_data; + + c_info("removing VNIC client\n"); + opa_vnic_ctrl_config_dev(cport, false); + vema_unregister(cport); + kfree(cport); +} + +static int __init opa_vnic_init(void) +{ + int rc; + + rc = ib_register_client(&opa_vnic_client); + if (rc) + pr_err("VNIC driver register failed %d\n", rc); + + return rc; +} +module_init(opa_vnic_init); + +static void opa_vnic_deinit(void) +{ + ib_unregister_client(&opa_vnic_client); +} +module_exit(opa_vnic_deinit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Cornelis Networks"); +MODULE_DESCRIPTION("Cornelis OPX Virtual Network driver"); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c new file mode 100644 index 000000000..292c037aa --- /dev/null +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c @@ -0,0 +1,390 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains OPA VNIC EMA Interface functions. + */ + +#include "opa_vnic_internal.h" + +/** + * opa_vnic_vema_report_event - sent trap to report the specified event + * @adapter: vnic port adapter + * @event: event to be reported + * + * This function calls vema api to sent a trap for the given event. + */ +void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event) +{ + struct __opa_veswport_info *info = &adapter->info; + struct __opa_veswport_trap trap_data; + + trap_data.fabric_id = info->vesw.fabric_id; + trap_data.veswid = info->vesw.vesw_id; + trap_data.veswportnum = info->vport.port_num; + trap_data.opaportnum = adapter->port_num; + trap_data.veswportindex = adapter->vport_num; + trap_data.opcode = event; + + opa_vnic_vema_send_trap(adapter, &trap_data, info->vport.encap_slid); +} + +/** + * opa_vnic_get_summary_counters - get summary counters + * @adapter: vnic port adapter + * @cntrs: pointer to destination summary counters structure + * + * This function populates the summary counters that is maintained by the + * given adapter to destination address provided. + */ +void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter, + struct opa_veswport_summary_counters *cntrs) +{ + struct opa_vnic_stats vstats; + __be64 *dst; + u64 *src; + + memset(&vstats, 0, sizeof(vstats)); + spin_lock(&adapter->stats_lock); + adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats); + spin_unlock(&adapter->stats_lock); + + cntrs->vp_instance = cpu_to_be16(adapter->vport_num); + cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id); + cntrs->veswport_num = cpu_to_be32(adapter->port_num); + + cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors); + cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors); + cntrs->tx_packets = cpu_to_be64(vstats.netstats.tx_packets); + cntrs->rx_packets = cpu_to_be64(vstats.netstats.rx_packets); + cntrs->tx_bytes = cpu_to_be64(vstats.netstats.tx_bytes); + cntrs->rx_bytes = cpu_to_be64(vstats.netstats.rx_bytes); + + /* + * This loop depends on layout of + * opa_veswport_summary_counters opa_vnic_stats structures. + */ + for (dst = &cntrs->tx_unicast, src = &vstats.tx_grp.unicast; + dst < &cntrs->reserved[0]; dst++, src++) { + *dst = cpu_to_be64(*src); + } +} + +/** + * opa_vnic_get_error_counters - get error counters + * @adapter: vnic port adapter + * @cntrs: pointer to destination error counters structure + * + * This function populates the error counters that is maintained by the + * given adapter to destination address provided. + */ +void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter, + struct opa_veswport_error_counters *cntrs) +{ + struct opa_vnic_stats vstats; + + memset(&vstats, 0, sizeof(vstats)); + spin_lock(&adapter->stats_lock); + adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats); + spin_unlock(&adapter->stats_lock); + + cntrs->vp_instance = cpu_to_be16(adapter->vport_num); + cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id); + cntrs->veswport_num = cpu_to_be32(adapter->port_num); + + cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors); + cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors); + cntrs->tx_dlid_zero = cpu_to_be64(vstats.tx_dlid_zero); + cntrs->tx_drop_state = cpu_to_be64(vstats.tx_drop_state); + cntrs->tx_logic = cpu_to_be64(vstats.netstats.tx_fifo_errors + + vstats.netstats.tx_carrier_errors); + + cntrs->rx_bad_veswid = cpu_to_be64(vstats.netstats.rx_nohandler); + cntrs->rx_runt = cpu_to_be64(vstats.rx_runt); + cntrs->rx_oversize = cpu_to_be64(vstats.rx_oversize); + cntrs->rx_drop_state = cpu_to_be64(vstats.rx_drop_state); + cntrs->rx_logic = cpu_to_be64(vstats.netstats.rx_fifo_errors); +} + +/** + * opa_vnic_get_vesw_info -- Get the vesw information + * @adapter: vnic port adapter + * @info: pointer to destination vesw info structure + * + * This function copies the vesw info that is maintained by the + * given adapter to destination address provided. + */ +void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter, + struct opa_vesw_info *info) +{ + struct __opa_vesw_info *src = &adapter->info.vesw; + int i; + + info->fabric_id = cpu_to_be16(src->fabric_id); + info->vesw_id = cpu_to_be16(src->vesw_id); + memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0)); + info->def_port_mask = cpu_to_be16(src->def_port_mask); + memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1)); + info->pkey = cpu_to_be16(src->pkey); + + memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2)); + info->u_mcast_dlid = cpu_to_be32(src->u_mcast_dlid); + for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) + info->u_ucast_dlid[i] = cpu_to_be32(src->u_ucast_dlid[i]); + + info->rc = cpu_to_be32(src->rc); + + memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3)); + info->eth_mtu = cpu_to_be16(src->eth_mtu); + memcpy(info->rsvd4, src->rsvd4, ARRAY_SIZE(src->rsvd4)); +} + +/** + * opa_vnic_set_vesw_info -- Set the vesw information + * @adapter: vnic port adapter + * @info: pointer to vesw info structure + * + * This function updates the vesw info that is maintained by the + * given adapter with vesw info provided. Reserved fields are stored + * and returned back to EM as is. + */ +void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter, + struct opa_vesw_info *info) +{ + struct __opa_vesw_info *dst = &adapter->info.vesw; + int i; + + dst->fabric_id = be16_to_cpu(info->fabric_id); + dst->vesw_id = be16_to_cpu(info->vesw_id); + memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0)); + dst->def_port_mask = be16_to_cpu(info->def_port_mask); + memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1)); + dst->pkey = be16_to_cpu(info->pkey); + + memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2)); + dst->u_mcast_dlid = be32_to_cpu(info->u_mcast_dlid); + for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) + dst->u_ucast_dlid[i] = be32_to_cpu(info->u_ucast_dlid[i]); + + dst->rc = be32_to_cpu(info->rc); + + memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3)); + dst->eth_mtu = be16_to_cpu(info->eth_mtu); + memcpy(dst->rsvd4, info->rsvd4, ARRAY_SIZE(info->rsvd4)); +} + +/** + * opa_vnic_get_per_veswport_info -- Get the vesw per port information + * @adapter: vnic port adapter + * @info: pointer to destination vport info structure + * + * This function copies the vesw per port info that is maintained by the + * given adapter to destination address provided. + * Note that the read only fields are not copied. + */ +void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter, + struct opa_per_veswport_info *info) +{ + struct __opa_per_veswport_info *src = &adapter->info.vport; + + info->port_num = cpu_to_be32(src->port_num); + info->eth_link_status = src->eth_link_status; + memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0)); + + memcpy(info->base_mac_addr, src->base_mac_addr, + ARRAY_SIZE(info->base_mac_addr)); + info->config_state = src->config_state; + info->oper_state = src->oper_state; + info->max_mac_tbl_ent = cpu_to_be16(src->max_mac_tbl_ent); + info->max_smac_ent = cpu_to_be16(src->max_smac_ent); + info->mac_tbl_digest = cpu_to_be32(src->mac_tbl_digest); + memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1)); + + info->encap_slid = cpu_to_be32(src->encap_slid); + memcpy(info->pcp_to_sc_uc, src->pcp_to_sc_uc, + ARRAY_SIZE(info->pcp_to_sc_uc)); + memcpy(info->pcp_to_vl_uc, src->pcp_to_vl_uc, + ARRAY_SIZE(info->pcp_to_vl_uc)); + memcpy(info->pcp_to_sc_mc, src->pcp_to_sc_mc, + ARRAY_SIZE(info->pcp_to_sc_mc)); + memcpy(info->pcp_to_vl_mc, src->pcp_to_vl_mc, + ARRAY_SIZE(info->pcp_to_vl_mc)); + info->non_vlan_sc_uc = src->non_vlan_sc_uc; + info->non_vlan_vl_uc = src->non_vlan_vl_uc; + info->non_vlan_sc_mc = src->non_vlan_sc_mc; + info->non_vlan_vl_mc = src->non_vlan_vl_mc; + memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2)); + + info->uc_macs_gen_count = cpu_to_be16(src->uc_macs_gen_count); + info->mc_macs_gen_count = cpu_to_be16(src->mc_macs_gen_count); + memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3)); +} + +/** + * opa_vnic_set_per_veswport_info -- Set vesw per port information + * @adapter: vnic port adapter + * @info: pointer to vport info structure + * + * This function updates the vesw per port info that is maintained by the + * given adapter with vesw per port info provided. Reserved fields are + * stored and returned back to EM as is. + */ +void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter, + struct opa_per_veswport_info *info) +{ + struct __opa_per_veswport_info *dst = &adapter->info.vport; + + dst->port_num = be32_to_cpu(info->port_num); + memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0)); + + memcpy(dst->base_mac_addr, info->base_mac_addr, + ARRAY_SIZE(dst->base_mac_addr)); + dst->config_state = info->config_state; + memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1)); + + dst->encap_slid = be32_to_cpu(info->encap_slid); + memcpy(dst->pcp_to_sc_uc, info->pcp_to_sc_uc, + ARRAY_SIZE(dst->pcp_to_sc_uc)); + memcpy(dst->pcp_to_vl_uc, info->pcp_to_vl_uc, + ARRAY_SIZE(dst->pcp_to_vl_uc)); + memcpy(dst->pcp_to_sc_mc, info->pcp_to_sc_mc, + ARRAY_SIZE(dst->pcp_to_sc_mc)); + memcpy(dst->pcp_to_vl_mc, info->pcp_to_vl_mc, + ARRAY_SIZE(dst->pcp_to_vl_mc)); + dst->non_vlan_sc_uc = info->non_vlan_sc_uc; + dst->non_vlan_vl_uc = info->non_vlan_vl_uc; + dst->non_vlan_sc_mc = info->non_vlan_sc_mc; + dst->non_vlan_vl_mc = info->non_vlan_vl_mc; + memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2)); + memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3)); +} + +/** + * opa_vnic_query_mcast_macs - query multicast mac list + * @adapter: vnic port adapter + * @macs: pointer mac list + * + * This function populates the provided mac list with the configured + * multicast addresses in the adapter. + */ +void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter, + struct opa_veswport_iface_macs *macs) +{ + u16 start_idx, num_macs, idx = 0, count = 0; + struct netdev_hw_addr *ha; + + start_idx = be16_to_cpu(macs->start_idx); + num_macs = be16_to_cpu(macs->num_macs_in_msg); + netdev_for_each_mc_addr(ha, adapter->netdev) { + struct opa_vnic_iface_mac_entry *entry = &macs->entry[count]; + + if (start_idx > idx++) + continue; + else if (num_macs == count) + break; + memcpy(entry, ha->addr, sizeof(*entry)); + count++; + } + + macs->tot_macs_in_lst = cpu_to_be16(netdev_mc_count(adapter->netdev)); + macs->num_macs_in_msg = cpu_to_be16(count); + macs->gen_count = cpu_to_be16(adapter->info.vport.mc_macs_gen_count); +} + +/** + * opa_vnic_query_ucast_macs - query unicast mac list + * @adapter: vnic port adapter + * @macs: pointer mac list + * + * This function populates the provided mac list with the configured + * unicast addresses in the adapter. + */ +void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, + struct opa_veswport_iface_macs *macs) +{ + u16 start_idx, tot_macs, num_macs, idx = 0, count = 0, em_macs = 0; + struct netdev_hw_addr *ha; + + start_idx = be16_to_cpu(macs->start_idx); + num_macs = be16_to_cpu(macs->num_macs_in_msg); + /* loop through dev_addrs list first */ + for_each_dev_addr(adapter->netdev, ha) { + struct opa_vnic_iface_mac_entry *entry = &macs->entry[count]; + + /* Do not include EM specified MAC address */ + if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr, + ARRAY_SIZE(adapter->info.vport.base_mac_addr))) { + em_macs++; + continue; + } + + if (start_idx > idx++) + continue; + else if (num_macs == count) + break; + memcpy(entry, ha->addr, sizeof(*entry)); + count++; + } + + /* loop through uc list */ + netdev_for_each_uc_addr(ha, adapter->netdev) { + struct opa_vnic_iface_mac_entry *entry = &macs->entry[count]; + + if (start_idx > idx++) + continue; + else if (num_macs == count) + break; + memcpy(entry, ha->addr, sizeof(*entry)); + count++; + } + + tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) + + netdev_uc_count(adapter->netdev) - em_macs; + macs->tot_macs_in_lst = cpu_to_be16(tot_macs); + macs->num_macs_in_msg = cpu_to_be16(count); + macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count); +} |