diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /drivers/net/ethernet/ibm | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/net/ethernet/ibm')
31 files changed, 23899 insertions, 0 deletions
diff --git a/drivers/net/ethernet/ibm/Kconfig b/drivers/net/ethernet/ibm/Kconfig new file mode 100644 index 000000000..c0c112d95 --- /dev/null +++ b/drivers/net/ethernet/ibm/Kconfig @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# IBM device configuration. +# + +config NET_VENDOR_IBM + bool "IBM devices" + default y + depends on PPC_PSERIES || PPC_DCR || (IBMEBUS && SPARSEMEM) + help + If you have a network (Ethernet) card belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about IBM devices. If you say Y, you will be asked for + your specific card in the following questions. + +if NET_VENDOR_IBM + +config IBMVETH + tristate "IBM LAN Virtual Ethernet support" + depends on PPC_PSERIES + help + This driver supports virtual ethernet adapters on newer IBM iSeries + and pSeries systems. + + To compile this driver as a module, choose M here. The module will + be called ibmveth. + +source "drivers/net/ethernet/ibm/emac/Kconfig" + +config EHEA + tristate "eHEA Ethernet support" + depends on IBMEBUS && SPARSEMEM + help + This driver supports the IBM pSeries eHEA ethernet adapter. + + To compile the driver as a module, choose M here. The module + will be called ehea. + +config IBMVNIC + tristate "IBM Virtual NIC support" + depends on PPC_PSERIES + help + This driver supports Virtual NIC adapters on IBM i and IBM System p + systems. + + To compile this driver as a module, choose M here. The module will + be called ibmvnic. + +endif # NET_VENDOR_IBM diff --git a/drivers/net/ethernet/ibm/Makefile b/drivers/net/ethernet/ibm/Makefile new file mode 100644 index 000000000..1d17d0c33 --- /dev/null +++ b/drivers/net/ethernet/ibm/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for th IBM network device drivers. +# + +obj-$(CONFIG_IBMVETH) += ibmveth.o +obj-$(CONFIG_IBMVNIC) += ibmvnic.o +obj-$(CONFIG_IBM_EMAC) += emac/ +obj-$(CONFIG_EHEA) += ehea/ diff --git a/drivers/net/ethernet/ibm/ehea/Makefile b/drivers/net/ethernet/ibm/ehea/Makefile new file mode 100644 index 000000000..9e1e5c7aa --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the eHEA ethernet device driver for IBM eServer System p +# +ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o +obj-$(CONFIG_EHEA) += ehea.o + diff --git a/drivers/net/ethernet/ibm/ehea/ehea.h b/drivers/net/ethernet/ibm/ehea/ehea.h new file mode 100644 index 000000000..208c440a6 --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea.h @@ -0,0 +1,477 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/drivers/net/ethernet/ibm/ehea/ehea.h + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + */ + +#ifndef __EHEA_H__ +#define __EHEA_H__ + +#include <linux/module.h> +#include <linux/ethtool.h> +#include <linux/vmalloc.h> +#include <linux/if_vlan.h> +#include <linux/platform_device.h> + +#include <asm/ibmebus.h> +#include <asm/io.h> + +#define DRV_NAME "ehea" +#define DRV_VERSION "EHEA_0107" + +/* eHEA capability flags */ +#define DLPAR_PORT_ADD_REM 1 +#define DLPAR_MEM_ADD 2 +#define DLPAR_MEM_REM 4 +#define EHEA_CAPABILITIES (DLPAR_PORT_ADD_REM | DLPAR_MEM_ADD | DLPAR_MEM_REM) + +#define EHEA_MSG_DEFAULT (NETIF_MSG_LINK | NETIF_MSG_TIMER \ + | NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) + +#define EHEA_MAX_ENTRIES_RQ1 32767 +#define EHEA_MAX_ENTRIES_RQ2 16383 +#define EHEA_MAX_ENTRIES_RQ3 16383 +#define EHEA_MAX_ENTRIES_SQ 32767 +#define EHEA_MIN_ENTRIES_QP 127 + +#define EHEA_SMALL_QUEUES + +#ifdef EHEA_SMALL_QUEUES +#define EHEA_MAX_CQE_COUNT 1023 +#define EHEA_DEF_ENTRIES_SQ 1023 +#define EHEA_DEF_ENTRIES_RQ1 1023 +#define EHEA_DEF_ENTRIES_RQ2 1023 +#define EHEA_DEF_ENTRIES_RQ3 511 +#else +#define EHEA_MAX_CQE_COUNT 4080 +#define EHEA_DEF_ENTRIES_SQ 4080 +#define EHEA_DEF_ENTRIES_RQ1 8160 +#define EHEA_DEF_ENTRIES_RQ2 2040 +#define EHEA_DEF_ENTRIES_RQ3 2040 +#endif + +#define EHEA_MAX_ENTRIES_EQ 20 + +#define EHEA_SG_SQ 2 +#define EHEA_SG_RQ1 1 +#define EHEA_SG_RQ2 0 +#define EHEA_SG_RQ3 0 + +#define EHEA_MAX_PACKET_SIZE 9022 /* for jumbo frames */ +#define EHEA_RQ2_PKT_SIZE 2048 +#define EHEA_L_PKT_SIZE 256 /* low latency */ + +/* Send completion signaling */ + +/* Protection Domain Identifier */ +#define EHEA_PD_ID 0xaabcdeff + +#define EHEA_RQ2_THRESHOLD 1 +#define EHEA_RQ3_THRESHOLD 4 /* use RQ3 threshold of 2048 bytes */ + +#define EHEA_SPEED_10G 10000 +#define EHEA_SPEED_1G 1000 +#define EHEA_SPEED_100M 100 +#define EHEA_SPEED_10M 10 +#define EHEA_SPEED_AUTONEG 0 + +/* Broadcast/Multicast registration types */ +#define EHEA_BCMC_SCOPE_ALL 0x08 +#define EHEA_BCMC_SCOPE_SINGLE 0x00 +#define EHEA_BCMC_MULTICAST 0x04 +#define EHEA_BCMC_BROADCAST 0x00 +#define EHEA_BCMC_UNTAGGED 0x02 +#define EHEA_BCMC_TAGGED 0x00 +#define EHEA_BCMC_VLANID_ALL 0x01 +#define EHEA_BCMC_VLANID_SINGLE 0x00 + +#define EHEA_CACHE_LINE 128 + +/* Memory Regions */ +#define EHEA_MR_ACC_CTRL 0x00800000 + +#define EHEA_BUSMAP_START 0x8000000000000000ULL +#define EHEA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL +#define EHEA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ +#define EHEA_TOP_INDEX_SHIFT (EHEA_DIR_INDEX_SHIFT * 2) +#define EHEA_MAP_ENTRIES (1 << EHEA_DIR_INDEX_SHIFT) +#define EHEA_MAP_SIZE (0x10000) /* currently fixed map size */ +#define EHEA_INDEX_MASK (EHEA_MAP_ENTRIES - 1) + + +#define EHEA_WATCH_DOG_TIMEOUT 10*HZ + +/* utility functions */ + +void ehea_dump(void *adr, int len, char *msg); + +#define EHEA_BMASK(pos, length) (((pos) << 16) + (length)) + +#define EHEA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) + +#define EHEA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) + +#define EHEA_BMASK_MASK(mask) \ + (0xffffffffffffffffULL >> ((64 - (mask)) & 0xffff)) + +#define EHEA_BMASK_SET(mask, value) \ + ((EHEA_BMASK_MASK(mask) & ((u64)(value))) << EHEA_BMASK_SHIFTPOS(mask)) + +#define EHEA_BMASK_GET(mask, value) \ + (EHEA_BMASK_MASK(mask) & (((u64)(value)) >> EHEA_BMASK_SHIFTPOS(mask))) + +/* + * Generic ehea page + */ +struct ehea_page { + u8 entries[PAGE_SIZE]; +}; + +/* + * Generic queue in linux kernel virtual memory + */ +struct hw_queue { + u64 current_q_offset; /* current queue entry */ + struct ehea_page **queue_pages; /* array of pages belonging to queue */ + u32 qe_size; /* queue entry size */ + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; /* toggle flag - per page */ + u32 reserved; /* 64 bit alignment */ +}; + +/* + * For pSeries this is a 64bit memory address where + * I/O memory is mapped into CPU address space + */ +struct h_epa { + void __iomem *addr; +}; + +struct h_epa_user { + u64 addr; +}; + +struct h_epas { + struct h_epa kernel; /* kernel space accessible resource, + set to 0 if unused */ + struct h_epa_user user; /* user space accessible resource + set to 0 if unused */ +}; + +/* + * Memory map data structures + */ +struct ehea_dir_bmap +{ + u64 ent[EHEA_MAP_ENTRIES]; +}; +struct ehea_top_bmap +{ + struct ehea_dir_bmap *dir[EHEA_MAP_ENTRIES]; +}; +struct ehea_bmap +{ + struct ehea_top_bmap *top[EHEA_MAP_ENTRIES]; +}; + +struct ehea_qp; +struct ehea_cq; +struct ehea_eq; +struct ehea_port; +struct ehea_av; + +/* + * Queue attributes passed to ehea_create_qp() + */ +struct ehea_qp_init_attr { + /* input parameter */ + u32 qp_token; /* queue token */ + u8 low_lat_rq1; + u8 signalingtype; /* cqe generation flag */ + u8 rq_count; /* num of receive queues */ + u8 eqe_gen; /* eqe generation flag */ + u16 max_nr_send_wqes; /* max number of send wqes */ + u16 max_nr_rwqes_rq1; /* max number of receive wqes */ + u16 max_nr_rwqes_rq2; + u16 max_nr_rwqes_rq3; + u8 wqe_size_enc_sq; + u8 wqe_size_enc_rq1; + u8 wqe_size_enc_rq2; + u8 wqe_size_enc_rq3; + u8 swqe_imm_data_len; /* immediate data length for swqes */ + u16 port_nr; + u16 rq2_threshold; + u16 rq3_threshold; + u64 send_cq_handle; + u64 recv_cq_handle; + u64 aff_eq_handle; + + /* output parameter */ + u32 qp_nr; + u16 act_nr_send_wqes; + u16 act_nr_rwqes_rq1; + u16 act_nr_rwqes_rq2; + u16 act_nr_rwqes_rq3; + u8 act_wqe_size_enc_sq; + u8 act_wqe_size_enc_rq1; + u8 act_wqe_size_enc_rq2; + u8 act_wqe_size_enc_rq3; + u32 nr_sq_pages; + u32 nr_rq1_pages; + u32 nr_rq2_pages; + u32 nr_rq3_pages; + u32 liobn_sq; + u32 liobn_rq1; + u32 liobn_rq2; + u32 liobn_rq3; +}; + +/* + * Event Queue attributes, passed as parameter + */ +struct ehea_eq_attr { + u32 type; + u32 max_nr_of_eqes; + u8 eqe_gen; /* generate eqe flag */ + u64 eq_handle; + u32 act_nr_of_eqes; + u32 nr_pages; + u32 ist1; /* Interrupt service token */ + u32 ist2; + u32 ist3; + u32 ist4; +}; + + +/* + * Event Queue + */ +struct ehea_eq { + struct ehea_adapter *adapter; + struct hw_queue hw_queue; + u64 fw_handle; + struct h_epas epas; + spinlock_t spinlock; + struct ehea_eq_attr attr; +}; + +/* + * HEA Queues + */ +struct ehea_qp { + struct ehea_adapter *adapter; + u64 fw_handle; /* QP handle for firmware calls */ + struct hw_queue hw_squeue; + struct hw_queue hw_rqueue1; + struct hw_queue hw_rqueue2; + struct hw_queue hw_rqueue3; + struct h_epas epas; + struct ehea_qp_init_attr init_attr; +}; + +/* + * Completion Queue attributes + */ +struct ehea_cq_attr { + /* input parameter */ + u32 max_nr_of_cqes; + u32 cq_token; + u64 eq_handle; + + /* output parameter */ + u32 act_nr_of_cqes; + u32 nr_pages; +}; + +/* + * Completion Queue + */ +struct ehea_cq { + struct ehea_adapter *adapter; + u64 fw_handle; + struct hw_queue hw_queue; + struct h_epas epas; + struct ehea_cq_attr attr; +}; + +/* + * Memory Region + */ +struct ehea_mr { + struct ehea_adapter *adapter; + u64 handle; + u64 vaddr; + u32 lkey; +}; + +/* + * Port state information + */ +struct port_stats { + int poll_receive_errors; + int queue_stopped; + int err_tcp_cksum; + int err_ip_cksum; + int err_frame_crc; +}; + +#define EHEA_IRQ_NAME_SIZE 20 + +/* + * Queue SKB Array + */ +struct ehea_q_skb_arr { + struct sk_buff **arr; /* skb array for queue */ + int len; /* array length */ + int index; /* array index */ + int os_skbs; /* rq2/rq3 only: outstanding skbs */ +}; + +/* + * Port resources + */ +struct ehea_port_res { + struct napi_struct napi; + struct port_stats p_stats; + struct ehea_mr send_mr; /* send memory region */ + struct ehea_mr recv_mr; /* receive memory region */ + struct ehea_port *port; + char int_recv_name[EHEA_IRQ_NAME_SIZE]; + char int_send_name[EHEA_IRQ_NAME_SIZE]; + struct ehea_qp *qp; + struct ehea_cq *send_cq; + struct ehea_cq *recv_cq; + struct ehea_eq *eq; + struct ehea_q_skb_arr rq1_skba; + struct ehea_q_skb_arr rq2_skba; + struct ehea_q_skb_arr rq3_skba; + struct ehea_q_skb_arr sq_skba; + int sq_skba_size; + int swqe_refill_th; + atomic_t swqe_avail; + int swqe_ll_count; + u32 swqe_id_counter; + u64 tx_packets; + u64 tx_bytes; + u64 rx_packets; + u64 rx_bytes; + int sq_restart_flag; +}; + + +#define EHEA_MAX_PORTS 16 + +#define EHEA_NUM_PORTRES_FW_HANDLES 6 /* QP handle, SendCQ handle, + RecvCQ handle, EQ handle, + SendMR handle, RecvMR handle */ +#define EHEA_NUM_PORT_FW_HANDLES 1 /* EQ handle */ +#define EHEA_NUM_ADAPTER_FW_HANDLES 2 /* MR handle, NEQ handle */ + +struct ehea_adapter { + u64 handle; + struct platform_device *ofdev; + struct ehea_port *port[EHEA_MAX_PORTS]; + struct ehea_eq *neq; /* notification event queue */ + struct tasklet_struct neq_tasklet; + struct ehea_mr mr; + u32 pd; /* protection domain */ + u64 max_mc_mac; /* max number of multicast mac addresses */ + int active_ports; + struct list_head list; +}; + + +struct ehea_mc_list { + struct list_head list; + u64 macaddr; +}; + +/* kdump support */ +struct ehea_fw_handle_entry { + u64 adh; /* Adapter Handle */ + u64 fwh; /* Firmware Handle */ +}; + +struct ehea_fw_handle_array { + struct ehea_fw_handle_entry *arr; + int num_entries; + struct mutex lock; +}; + +struct ehea_bcmc_reg_entry { + u64 adh; /* Adapter Handle */ + u32 port_id; /* Logical Port Id */ + u8 reg_type; /* Registration Type */ + u64 macaddr; +}; + +struct ehea_bcmc_reg_array { + struct ehea_bcmc_reg_entry *arr; + int num_entries; + spinlock_t lock; +}; + +#define EHEA_PORT_UP 1 +#define EHEA_PORT_DOWN 0 +#define EHEA_PHY_LINK_UP 1 +#define EHEA_PHY_LINK_DOWN 0 +#define EHEA_MAX_PORT_RES 16 +struct ehea_port { + struct ehea_adapter *adapter; /* adapter that owns this port */ + struct net_device *netdev; + struct rtnl_link_stats64 stats; + struct ehea_port_res port_res[EHEA_MAX_PORT_RES]; + struct platform_device ofdev; /* Open Firmware Device */ + struct ehea_mc_list *mc_list; /* Multicast MAC addresses */ + struct ehea_eq *qp_eq; + struct work_struct reset_task; + struct delayed_work stats_work; + struct mutex port_lock; + char int_aff_name[EHEA_IRQ_NAME_SIZE]; + int allmulti; /* Indicates IFF_ALLMULTI state */ + int promisc; /* Indicates IFF_PROMISC state */ + int num_mcs; + int resets; + unsigned long flags; + u64 mac_addr; + u32 logical_port_id; + u32 port_speed; + u32 msg_enable; + u32 sig_comp_iv; + u32 state; + u8 phy_link; + u8 full_duplex; + u8 autoneg; + u8 num_def_qps; + wait_queue_head_t swqe_avail_wq; + wait_queue_head_t restart_wq; +}; + +struct port_res_cfg { + int max_entries_rcq; + int max_entries_scq; + int max_entries_sq; + int max_entries_rq1; + int max_entries_rq2; + int max_entries_rq3; +}; + +enum ehea_flag_bits { + __EHEA_STOP_XFER, + __EHEA_DISABLE_PORT_RESET +}; + +void ehea_set_ethtool_ops(struct net_device *netdev); +int ehea_sense_port_attr(struct ehea_port *port); +int ehea_set_portspeed(struct ehea_port *port, u32 port_speed); + +#endif /* __EHEA_H__ */ diff --git a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c new file mode 100644 index 000000000..1db5b6790 --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * linux/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "ehea.h" +#include "ehea_phyp.h" + +static int ehea_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + struct ehea_port *port = netdev_priv(dev); + u32 supported, advertising; + u32 speed; + int ret; + + ret = ehea_sense_port_attr(port); + + if (ret) + return ret; + + if (netif_carrier_ok(dev)) { + switch (port->port_speed) { + case EHEA_SPEED_10M: + speed = SPEED_10; + break; + case EHEA_SPEED_100M: + speed = SPEED_100; + break; + case EHEA_SPEED_1G: + speed = SPEED_1000; + break; + case EHEA_SPEED_10G: + speed = SPEED_10000; + break; + default: + speed = -1; + break; /* BUG */ + } + cmd->base.duplex = port->full_duplex == 1 ? + DUPLEX_FULL : DUPLEX_HALF; + } else { + speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + } + cmd->base.speed = speed; + + if (cmd->base.speed == SPEED_10000) { + supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE); + advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); + cmd->base.port = PORT_FIBRE; + } else { + supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full + | SUPPORTED_100baseT_Half | SUPPORTED_10baseT_Full + | SUPPORTED_10baseT_Half | SUPPORTED_Autoneg + | SUPPORTED_TP); + advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg + | ADVERTISED_TP); + cmd->base.port = PORT_TP; + } + + cmd->base.autoneg = port->autoneg == 1 ? + AUTONEG_ENABLE : AUTONEG_DISABLE; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + + return 0; +} + +static int ehea_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) +{ + struct ehea_port *port = netdev_priv(dev); + int ret = 0; + u32 sp; + + if (cmd->base.autoneg == AUTONEG_ENABLE) { + sp = EHEA_SPEED_AUTONEG; + goto doit; + } + + switch (cmd->base.speed) { + case SPEED_10: + if (cmd->base.duplex == DUPLEX_FULL) + sp = H_SPEED_10M_F; + else + sp = H_SPEED_10M_H; + break; + + case SPEED_100: + if (cmd->base.duplex == DUPLEX_FULL) + sp = H_SPEED_100M_F; + else + sp = H_SPEED_100M_H; + break; + + case SPEED_1000: + if (cmd->base.duplex == DUPLEX_FULL) + sp = H_SPEED_1G_F; + else + ret = -EINVAL; + break; + + case SPEED_10000: + if (cmd->base.duplex == DUPLEX_FULL) + sp = H_SPEED_10G_F; + else + ret = -EINVAL; + break; + + default: + ret = -EINVAL; + break; + } + + if (ret) + goto out; +doit: + ret = ehea_set_portspeed(port, sp); + + if (!ret) + netdev_info(dev, + "Port speed successfully set: %dMbps %s Duplex\n", + port->port_speed, + port->full_duplex == 1 ? "Full" : "Half"); +out: + return ret; +} + +static int ehea_nway_reset(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + int ret; + + ret = ehea_set_portspeed(port, EHEA_SPEED_AUTONEG); + + if (!ret) + netdev_info(port->netdev, + "Port speed successfully set: %dMbps %s Duplex\n", + port->port_speed, + port->full_duplex == 1 ? "Full" : "Half"); + return ret; +} + +static void ehea_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); + strscpy(info->version, DRV_VERSION, sizeof(info->version)); +} + +static u32 ehea_get_msglevel(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + return port->msg_enable; +} + +static void ehea_set_msglevel(struct net_device *dev, u32 value) +{ + struct ehea_port *port = netdev_priv(dev); + port->msg_enable = value; +} + +static const char ehea_ethtool_stats_keys[][ETH_GSTRING_LEN] = { + {"sig_comp_iv"}, + {"swqe_refill_th"}, + {"port resets"}, + {"Receive errors"}, + {"TCP cksum errors"}, + {"IP cksum errors"}, + {"Frame cksum errors"}, + {"num SQ stopped"}, + {"PR0 free_swqes"}, + {"PR1 free_swqes"}, + {"PR2 free_swqes"}, + {"PR3 free_swqes"}, + {"PR4 free_swqes"}, + {"PR5 free_swqes"}, + {"PR6 free_swqes"}, + {"PR7 free_swqes"}, + {"PR8 free_swqes"}, + {"PR9 free_swqes"}, + {"PR10 free_swqes"}, + {"PR11 free_swqes"}, + {"PR12 free_swqes"}, + {"PR13 free_swqes"}, + {"PR14 free_swqes"}, + {"PR15 free_swqes"}, +}; + +static void ehea_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + if (stringset == ETH_SS_STATS) { + memcpy(data, &ehea_ethtool_stats_keys, + sizeof(ehea_ethtool_stats_keys)); + } +} + +static int ehea_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ehea_ethtool_stats_keys); + default: + return -EOPNOTSUPP; + } +} + +static void ehea_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + int i, k, tmp; + struct ehea_port *port = netdev_priv(dev); + + for (i = 0; i < ehea_get_sset_count(dev, ETH_SS_STATS); i++) + data[i] = 0; + i = 0; + + data[i++] = port->sig_comp_iv; + data[i++] = port->port_res[0].swqe_refill_th; + data[i++] = port->resets; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp += port->port_res[k].p_stats.poll_receive_errors; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp += port->port_res[k].p_stats.err_tcp_cksum; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp += port->port_res[k].p_stats.err_ip_cksum; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp += port->port_res[k].p_stats.err_frame_crc; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp += port->port_res[k].p_stats.queue_stopped; + data[i++] = tmp; + + for (k = 0; k < 16; k++) + data[i++] = atomic_read(&port->port_res[k].swqe_avail); +} + +static const struct ethtool_ops ehea_ethtool_ops = { + .get_drvinfo = ehea_get_drvinfo, + .get_msglevel = ehea_get_msglevel, + .set_msglevel = ehea_set_msglevel, + .get_link = ethtool_op_get_link, + .get_strings = ehea_get_strings, + .get_sset_count = ehea_get_sset_count, + .get_ethtool_stats = ehea_get_ethtool_stats, + .nway_reset = ehea_nway_reset, /* Restart autonegotiation */ + .get_link_ksettings = ehea_get_link_ksettings, + .set_link_ksettings = ehea_set_link_ksettings, +}; + +void ehea_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &ehea_ethtool_ops; +} diff --git a/drivers/net/ethernet/ibm/ehea/ehea_hw.h b/drivers/net/ethernet/ibm/ehea/ehea_hw.h new file mode 100644 index 000000000..590933a45 --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_hw.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/drivers/net/ethernet/ibm/ehea/ehea_hw.h + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + */ + +#ifndef __EHEA_HW_H__ +#define __EHEA_HW_H__ + +#define QPX_SQA_VALUE EHEA_BMASK_IBM(48, 63) +#define QPX_RQ1A_VALUE EHEA_BMASK_IBM(48, 63) +#define QPX_RQ2A_VALUE EHEA_BMASK_IBM(48, 63) +#define QPX_RQ3A_VALUE EHEA_BMASK_IBM(48, 63) + +#define QPTEMM_OFFSET(x) offsetof(struct ehea_qptemm, x) + +struct ehea_qptemm { + u64 qpx_hcr; + u64 qpx_c; + u64 qpx_herr; + u64 qpx_aer; + u64 qpx_sqa; + u64 qpx_sqc; + u64 qpx_rq1a; + u64 qpx_rq1c; + u64 qpx_st; + u64 qpx_aerr; + u64 qpx_tenure; + u64 qpx_reserved1[(0x098 - 0x058) / 8]; + u64 qpx_portp; + u64 qpx_reserved2[(0x100 - 0x0A0) / 8]; + u64 qpx_t; + u64 qpx_sqhp; + u64 qpx_sqptp; + u64 qpx_reserved3[(0x140 - 0x118) / 8]; + u64 qpx_sqwsize; + u64 qpx_reserved4[(0x170 - 0x148) / 8]; + u64 qpx_sqsize; + u64 qpx_reserved5[(0x1B0 - 0x178) / 8]; + u64 qpx_sigt; + u64 qpx_wqecnt; + u64 qpx_rq1hp; + u64 qpx_rq1ptp; + u64 qpx_rq1size; + u64 qpx_reserved6[(0x220 - 0x1D8) / 8]; + u64 qpx_rq1wsize; + u64 qpx_reserved7[(0x240 - 0x228) / 8]; + u64 qpx_pd; + u64 qpx_scqn; + u64 qpx_rcqn; + u64 qpx_aeqn; + u64 reserved49; + u64 qpx_ram; + u64 qpx_reserved8[(0x300 - 0x270) / 8]; + u64 qpx_rq2a; + u64 qpx_rq2c; + u64 qpx_rq2hp; + u64 qpx_rq2ptp; + u64 qpx_rq2size; + u64 qpx_rq2wsize; + u64 qpx_rq2th; + u64 qpx_rq3a; + u64 qpx_rq3c; + u64 qpx_rq3hp; + u64 qpx_rq3ptp; + u64 qpx_rq3size; + u64 qpx_rq3wsize; + u64 qpx_rq3th; + u64 qpx_lpn; + u64 qpx_reserved9[(0x400 - 0x378) / 8]; + u64 reserved_ext[(0x500 - 0x400) / 8]; + u64 reserved2[(0x1000 - 0x500) / 8]; +}; + +#define MRx_HCR_LPARID_VALID EHEA_BMASK_IBM(0, 0) + +#define MRMWMM_OFFSET(x) offsetof(struct ehea_mrmwmm, x) + +struct ehea_mrmwmm { + u64 mrx_hcr; + u64 mrx_c; + u64 mrx_herr; + u64 mrx_aer; + u64 mrx_pp; + u64 reserved1; + u64 reserved2; + u64 reserved3; + u64 reserved4[(0x200 - 0x40) / 8]; + u64 mrx_ctl[64]; +}; + +#define QPEDMM_OFFSET(x) offsetof(struct ehea_qpedmm, x) + +struct ehea_qpedmm { + + u64 reserved0[(0x400) / 8]; + u64 qpedx_phh; + u64 qpedx_ppsgp; + u64 qpedx_ppsgu; + u64 qpedx_ppdgp; + u64 qpedx_ppdgu; + u64 qpedx_aph; + u64 qpedx_apsgp; + u64 qpedx_apsgu; + u64 qpedx_apdgp; + u64 qpedx_apdgu; + u64 qpedx_apav; + u64 qpedx_apsav; + u64 qpedx_hcr; + u64 reserved1[4]; + u64 qpedx_rrl0; + u64 qpedx_rrrkey0; + u64 qpedx_rrva0; + u64 reserved2; + u64 qpedx_rrl1; + u64 qpedx_rrrkey1; + u64 qpedx_rrva1; + u64 reserved3; + u64 qpedx_rrl2; + u64 qpedx_rrrkey2; + u64 qpedx_rrva2; + u64 reserved4; + u64 qpedx_rrl3; + u64 qpedx_rrrkey3; + u64 qpedx_rrva3; +}; + +#define CQX_FECADDER EHEA_BMASK_IBM(32, 63) +#define CQX_FEC_CQE_CNT EHEA_BMASK_IBM(32, 63) +#define CQX_N1_GENERATE_COMP_EVENT EHEA_BMASK_IBM(0, 0) +#define CQX_EP_EVENT_PENDING EHEA_BMASK_IBM(0, 0) + +#define CQTEMM_OFFSET(x) offsetof(struct ehea_cqtemm, x) + +struct ehea_cqtemm { + u64 cqx_hcr; + u64 cqx_c; + u64 cqx_herr; + u64 cqx_aer; + u64 cqx_ptp; + u64 cqx_tp; + u64 cqx_fec; + u64 cqx_feca; + u64 cqx_ep; + u64 cqx_eq; + u64 reserved1; + u64 cqx_n0; + u64 cqx_n1; + u64 reserved2[(0x1000 - 0x60) / 8]; +}; + +#define EQTEMM_OFFSET(x) offsetof(struct ehea_eqtemm, x) + +struct ehea_eqtemm { + u64 eqx_hcr; + u64 eqx_c; + u64 eqx_herr; + u64 eqx_aer; + u64 eqx_ptp; + u64 eqx_tp; + u64 eqx_ssba; + u64 eqx_psba; + u64 eqx_cec; + u64 eqx_meql; + u64 eqx_xisbi; + u64 eqx_xisc; + u64 eqx_it; +}; + +/* + * These access functions will be changed when the dissuccsion about + * the new access methods for POWER has settled. + */ + +static inline u64 epa_load(struct h_epa epa, u32 offset) +{ + return __raw_readq((void __iomem *)(epa.addr + offset)); +} + +static inline void epa_store(struct h_epa epa, u32 offset, u64 value) +{ + __raw_writeq(value, (void __iomem *)(epa.addr + offset)); + epa_load(epa, offset); /* synchronize explicitly to eHEA */ +} + +static inline void epa_store_acc(struct h_epa epa, u32 offset, u64 value) +{ + __raw_writeq(value, (void __iomem *)(epa.addr + offset)); +} + +#define epa_store_cq(epa, offset, value)\ + epa_store(epa, CQTEMM_OFFSET(offset), value) +#define epa_load_cq(epa, offset)\ + epa_load(epa, CQTEMM_OFFSET(offset)) + +static inline void ehea_update_sqa(struct ehea_qp *qp, u16 nr_wqes) +{ + struct h_epa epa = qp->epas.kernel; + epa_store_acc(epa, QPTEMM_OFFSET(qpx_sqa), + EHEA_BMASK_SET(QPX_SQA_VALUE, nr_wqes)); +} + +static inline void ehea_update_rq3a(struct ehea_qp *qp, u16 nr_wqes) +{ + struct h_epa epa = qp->epas.kernel; + epa_store_acc(epa, QPTEMM_OFFSET(qpx_rq3a), + EHEA_BMASK_SET(QPX_RQ1A_VALUE, nr_wqes)); +} + +static inline void ehea_update_rq2a(struct ehea_qp *qp, u16 nr_wqes) +{ + struct h_epa epa = qp->epas.kernel; + epa_store_acc(epa, QPTEMM_OFFSET(qpx_rq2a), + EHEA_BMASK_SET(QPX_RQ2A_VALUE, nr_wqes)); +} + +static inline void ehea_update_rq1a(struct ehea_qp *qp, u16 nr_wqes) +{ + struct h_epa epa = qp->epas.kernel; + epa_store_acc(epa, QPTEMM_OFFSET(qpx_rq1a), + EHEA_BMASK_SET(QPX_RQ3A_VALUE, nr_wqes)); +} + +static inline void ehea_update_feca(struct ehea_cq *cq, u32 nr_cqes) +{ + struct h_epa epa = cq->epas.kernel; + epa_store_acc(epa, CQTEMM_OFFSET(cqx_feca), + EHEA_BMASK_SET(CQX_FECADDER, nr_cqes)); +} + +static inline void ehea_reset_cq_n1(struct ehea_cq *cq) +{ + struct h_epa epa = cq->epas.kernel; + epa_store_cq(epa, cqx_n1, + EHEA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, 1)); +} + +static inline void ehea_reset_cq_ep(struct ehea_cq *my_cq) +{ + struct h_epa epa = my_cq->epas.kernel; + epa_store_acc(epa, CQTEMM_OFFSET(cqx_ep), + EHEA_BMASK_SET(CQX_EP_EVENT_PENDING, 0)); +} + +#endif /* __EHEA_HW_H__ */ diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c new file mode 100644 index 000000000..b4aff59b3 --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -0,0 +1,3582 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * linux/drivers/net/ethernet/ibm/ehea/ehea_main.c + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/device.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/if_ether.h> +#include <linux/notifier.h> +#include <linux/reboot.h> +#include <linux/memory.h> +#include <asm/kexec.h> +#include <linux/mutex.h> +#include <linux/prefetch.h> +#include <linux/of.h> +#include <linux/of_device.h> + +#include <net/ip.h> + +#include "ehea.h" +#include "ehea_qmr.h" +#include "ehea_phyp.h" + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); +MODULE_DESCRIPTION("IBM eServer HEA Driver"); +MODULE_VERSION(DRV_VERSION); + + +static int msg_level = -1; +static int rq1_entries = EHEA_DEF_ENTRIES_RQ1; +static int rq2_entries = EHEA_DEF_ENTRIES_RQ2; +static int rq3_entries = EHEA_DEF_ENTRIES_RQ3; +static int sq_entries = EHEA_DEF_ENTRIES_SQ; +static int use_mcs = 1; +static int prop_carrier_state; + +module_param(msg_level, int, 0); +module_param(rq1_entries, int, 0); +module_param(rq2_entries, int, 0); +module_param(rq3_entries, int, 0); +module_param(sq_entries, int, 0); +module_param(prop_carrier_state, int, 0); +module_param(use_mcs, int, 0); + +MODULE_PARM_DESC(msg_level, "msg_level"); +MODULE_PARM_DESC(prop_carrier_state, "Propagate carrier state of physical " + "port to stack. 1:yes, 0:no. Default = 0 "); +MODULE_PARM_DESC(rq3_entries, "Number of entries for Receive Queue 3 " + "[2^x - 1], x = [7..14]. Default = " + __MODULE_STRING(EHEA_DEF_ENTRIES_RQ3) ")"); +MODULE_PARM_DESC(rq2_entries, "Number of entries for Receive Queue 2 " + "[2^x - 1], x = [7..14]. Default = " + __MODULE_STRING(EHEA_DEF_ENTRIES_RQ2) ")"); +MODULE_PARM_DESC(rq1_entries, "Number of entries for Receive Queue 1 " + "[2^x - 1], x = [7..14]. Default = " + __MODULE_STRING(EHEA_DEF_ENTRIES_RQ1) ")"); +MODULE_PARM_DESC(sq_entries, " Number of entries for the Send Queue " + "[2^x - 1], x = [7..14]. Default = " + __MODULE_STRING(EHEA_DEF_ENTRIES_SQ) ")"); +MODULE_PARM_DESC(use_mcs, " Multiple receive queues, 1: enable, 0: disable, " + "Default = 1"); + +static int port_name_cnt; +static LIST_HEAD(adapter_list); +static unsigned long ehea_driver_flags; +static DEFINE_MUTEX(dlpar_mem_lock); +static struct ehea_fw_handle_array ehea_fw_handles; +static struct ehea_bcmc_reg_array ehea_bcmc_regs; + + +static int ehea_probe_adapter(struct platform_device *dev); + +static int ehea_remove(struct platform_device *dev); + +static const struct of_device_id ehea_module_device_table[] = { + { + .name = "lhea", + .compatible = "IBM,lhea", + }, + { + .type = "network", + .compatible = "IBM,lhea-ethernet", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, ehea_module_device_table); + +static const struct of_device_id ehea_device_table[] = { + { + .name = "lhea", + .compatible = "IBM,lhea", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, ehea_device_table); + +static struct platform_driver ehea_driver = { + .driver = { + .name = "ehea", + .owner = THIS_MODULE, + .of_match_table = ehea_device_table, + }, + .probe = ehea_probe_adapter, + .remove = ehea_remove, +}; + +void ehea_dump(void *adr, int len, char *msg) +{ + int x; + unsigned char *deb = adr; + for (x = 0; x < len; x += 16) { + pr_info("%s adr=%p ofs=%04x %016llx %016llx\n", + msg, deb, x, *((u64 *)&deb[0]), *((u64 *)&deb[8])); + deb += 16; + } +} + +static void ehea_schedule_port_reset(struct ehea_port *port) +{ + if (!test_bit(__EHEA_DISABLE_PORT_RESET, &port->flags)) + schedule_work(&port->reset_task); +} + +static void ehea_update_firmware_handles(void) +{ + struct ehea_fw_handle_entry *arr = NULL; + struct ehea_adapter *adapter; + int num_adapters = 0; + int num_ports = 0; + int num_portres = 0; + int i = 0; + int num_fw_handles, k, l; + + /* Determine number of handles */ + mutex_lock(&ehea_fw_handles.lock); + + list_for_each_entry(adapter, &adapter_list, list) { + num_adapters++; + + for (k = 0; k < EHEA_MAX_PORTS; k++) { + struct ehea_port *port = adapter->port[k]; + + if (!port || (port->state != EHEA_PORT_UP)) + continue; + + num_ports++; + num_portres += port->num_def_qps; + } + } + + num_fw_handles = num_adapters * EHEA_NUM_ADAPTER_FW_HANDLES + + num_ports * EHEA_NUM_PORT_FW_HANDLES + + num_portres * EHEA_NUM_PORTRES_FW_HANDLES; + + if (num_fw_handles) { + arr = kcalloc(num_fw_handles, sizeof(*arr), GFP_KERNEL); + if (!arr) + goto out; /* Keep the existing array */ + } else + goto out_update; + + list_for_each_entry(adapter, &adapter_list, list) { + if (num_adapters == 0) + break; + + for (k = 0; k < EHEA_MAX_PORTS; k++) { + struct ehea_port *port = adapter->port[k]; + + if (!port || (port->state != EHEA_PORT_UP) || + (num_ports == 0)) + continue; + + for (l = 0; l < port->num_def_qps; l++) { + struct ehea_port_res *pr = &port->port_res[l]; + + arr[i].adh = adapter->handle; + arr[i++].fwh = pr->qp->fw_handle; + arr[i].adh = adapter->handle; + arr[i++].fwh = pr->send_cq->fw_handle; + arr[i].adh = adapter->handle; + arr[i++].fwh = pr->recv_cq->fw_handle; + arr[i].adh = adapter->handle; + arr[i++].fwh = pr->eq->fw_handle; + arr[i].adh = adapter->handle; + arr[i++].fwh = pr->send_mr.handle; + arr[i].adh = adapter->handle; + arr[i++].fwh = pr->recv_mr.handle; + } + arr[i].adh = adapter->handle; + arr[i++].fwh = port->qp_eq->fw_handle; + num_ports--; + } + + arr[i].adh = adapter->handle; + arr[i++].fwh = adapter->neq->fw_handle; + + if (adapter->mr.handle) { + arr[i].adh = adapter->handle; + arr[i++].fwh = adapter->mr.handle; + } + num_adapters--; + } + +out_update: + kfree(ehea_fw_handles.arr); + ehea_fw_handles.arr = arr; + ehea_fw_handles.num_entries = i; +out: + mutex_unlock(&ehea_fw_handles.lock); +} + +static void ehea_update_bcmc_registrations(void) +{ + unsigned long flags; + struct ehea_bcmc_reg_entry *arr = NULL; + struct ehea_adapter *adapter; + struct ehea_mc_list *mc_entry; + int num_registrations = 0; + int i = 0; + int k; + + spin_lock_irqsave(&ehea_bcmc_regs.lock, flags); + + /* Determine number of registrations */ + list_for_each_entry(adapter, &adapter_list, list) + for (k = 0; k < EHEA_MAX_PORTS; k++) { + struct ehea_port *port = adapter->port[k]; + + if (!port || (port->state != EHEA_PORT_UP)) + continue; + + num_registrations += 2; /* Broadcast registrations */ + + list_for_each_entry(mc_entry, &port->mc_list->list,list) + num_registrations += 2; + } + + if (num_registrations) { + arr = kcalloc(num_registrations, sizeof(*arr), GFP_ATOMIC); + if (!arr) + goto out; /* Keep the existing array */ + } else + goto out_update; + + list_for_each_entry(adapter, &adapter_list, list) { + for (k = 0; k < EHEA_MAX_PORTS; k++) { + struct ehea_port *port = adapter->port[k]; + + if (!port || (port->state != EHEA_PORT_UP)) + continue; + + if (num_registrations == 0) + goto out_update; + + arr[i].adh = adapter->handle; + arr[i].port_id = port->logical_port_id; + arr[i].reg_type = EHEA_BCMC_BROADCAST | + EHEA_BCMC_UNTAGGED; + arr[i++].macaddr = port->mac_addr; + + arr[i].adh = adapter->handle; + arr[i].port_id = port->logical_port_id; + arr[i].reg_type = EHEA_BCMC_BROADCAST | + EHEA_BCMC_VLANID_ALL; + arr[i++].macaddr = port->mac_addr; + num_registrations -= 2; + + list_for_each_entry(mc_entry, + &port->mc_list->list, list) { + if (num_registrations == 0) + goto out_update; + + arr[i].adh = adapter->handle; + arr[i].port_id = port->logical_port_id; + arr[i].reg_type = EHEA_BCMC_MULTICAST | + EHEA_BCMC_UNTAGGED; + if (mc_entry->macaddr == 0) + arr[i].reg_type |= EHEA_BCMC_SCOPE_ALL; + arr[i++].macaddr = mc_entry->macaddr; + + arr[i].adh = adapter->handle; + arr[i].port_id = port->logical_port_id; + arr[i].reg_type = EHEA_BCMC_MULTICAST | + EHEA_BCMC_VLANID_ALL; + if (mc_entry->macaddr == 0) + arr[i].reg_type |= EHEA_BCMC_SCOPE_ALL; + arr[i++].macaddr = mc_entry->macaddr; + num_registrations -= 2; + } + } + } + +out_update: + kfree(ehea_bcmc_regs.arr); + ehea_bcmc_regs.arr = arr; + ehea_bcmc_regs.num_entries = i; +out: + spin_unlock_irqrestore(&ehea_bcmc_regs.lock, flags); +} + +static void ehea_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct ehea_port *port = netdev_priv(dev); + u64 rx_packets = 0, tx_packets = 0, rx_bytes = 0, tx_bytes = 0; + int i; + + for (i = 0; i < port->num_def_qps; i++) { + rx_packets += port->port_res[i].rx_packets; + rx_bytes += port->port_res[i].rx_bytes; + } + + for (i = 0; i < port->num_def_qps; i++) { + tx_packets += port->port_res[i].tx_packets; + tx_bytes += port->port_res[i].tx_bytes; + } + + stats->tx_packets = tx_packets; + stats->rx_bytes = rx_bytes; + stats->tx_bytes = tx_bytes; + stats->rx_packets = rx_packets; + + stats->multicast = port->stats.multicast; + stats->rx_errors = port->stats.rx_errors; +} + +static void ehea_update_stats(struct work_struct *work) +{ + struct ehea_port *port = + container_of(work, struct ehea_port, stats_work.work); + struct net_device *dev = port->netdev; + struct rtnl_link_stats64 *stats = &port->stats; + struct hcp_ehea_port_cb2 *cb2; + u64 hret; + + cb2 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb2) { + netdev_err(dev, "No mem for cb2. Some interface statistics were not updated\n"); + goto resched; + } + + hret = ehea_h_query_ehea_port(port->adapter->handle, + port->logical_port_id, + H_PORT_CB2, H_PORT_CB2_ALL, cb2); + if (hret != H_SUCCESS) { + netdev_err(dev, "query_ehea_port failed\n"); + goto out_herr; + } + + if (netif_msg_hw(port)) + ehea_dump(cb2, sizeof(*cb2), "net_device_stats"); + + stats->multicast = cb2->rxmcp; + stats->rx_errors = cb2->rxuerr; + +out_herr: + free_page((unsigned long)cb2); +resched: + schedule_delayed_work(&port->stats_work, + round_jiffies_relative(msecs_to_jiffies(1000))); +} + +static void ehea_refill_rq1(struct ehea_port_res *pr, int index, int nr_of_wqes) +{ + struct sk_buff **skb_arr_rq1 = pr->rq1_skba.arr; + struct net_device *dev = pr->port->netdev; + int max_index_mask = pr->rq1_skba.len - 1; + int fill_wqes = pr->rq1_skba.os_skbs + nr_of_wqes; + int adder = 0; + int i; + + pr->rq1_skba.os_skbs = 0; + + if (unlikely(test_bit(__EHEA_STOP_XFER, &ehea_driver_flags))) { + if (nr_of_wqes > 0) + pr->rq1_skba.index = index; + pr->rq1_skba.os_skbs = fill_wqes; + return; + } + + for (i = 0; i < fill_wqes; i++) { + if (!skb_arr_rq1[index]) { + skb_arr_rq1[index] = netdev_alloc_skb(dev, + EHEA_L_PKT_SIZE); + if (!skb_arr_rq1[index]) { + pr->rq1_skba.os_skbs = fill_wqes - i; + break; + } + } + index--; + index &= max_index_mask; + adder++; + } + + if (adder == 0) + return; + + /* Ring doorbell */ + ehea_update_rq1a(pr->qp, adder); +} + +static void ehea_init_fill_rq1(struct ehea_port_res *pr, int nr_rq1a) +{ + struct sk_buff **skb_arr_rq1 = pr->rq1_skba.arr; + struct net_device *dev = pr->port->netdev; + int i; + + if (nr_rq1a > pr->rq1_skba.len) { + netdev_err(dev, "NR_RQ1A bigger than skb array len\n"); + return; + } + + for (i = 0; i < nr_rq1a; i++) { + skb_arr_rq1[i] = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE); + if (!skb_arr_rq1[i]) + break; + } + /* Ring doorbell */ + ehea_update_rq1a(pr->qp, i - 1); +} + +static int ehea_refill_rq_def(struct ehea_port_res *pr, + struct ehea_q_skb_arr *q_skba, int rq_nr, + int num_wqes, int wqe_type, int packet_size) +{ + struct net_device *dev = pr->port->netdev; + struct ehea_qp *qp = pr->qp; + struct sk_buff **skb_arr = q_skba->arr; + struct ehea_rwqe *rwqe; + int i, index, max_index_mask, fill_wqes; + int adder = 0; + int ret = 0; + + fill_wqes = q_skba->os_skbs + num_wqes; + q_skba->os_skbs = 0; + + if (unlikely(test_bit(__EHEA_STOP_XFER, &ehea_driver_flags))) { + q_skba->os_skbs = fill_wqes; + return ret; + } + + index = q_skba->index; + max_index_mask = q_skba->len - 1; + for (i = 0; i < fill_wqes; i++) { + u64 tmp_addr; + struct sk_buff *skb; + + skb = netdev_alloc_skb_ip_align(dev, packet_size); + if (!skb) { + q_skba->os_skbs = fill_wqes - i; + if (q_skba->os_skbs == q_skba->len - 2) { + netdev_info(pr->port->netdev, + "rq%i ran dry - no mem for skb\n", + rq_nr); + ret = -ENOMEM; + } + break; + } + + skb_arr[index] = skb; + tmp_addr = ehea_map_vaddr(skb->data); + if (tmp_addr == -1) { + dev_consume_skb_any(skb); + q_skba->os_skbs = fill_wqes - i; + ret = 0; + break; + } + + rwqe = ehea_get_next_rwqe(qp, rq_nr); + rwqe->wr_id = EHEA_BMASK_SET(EHEA_WR_ID_TYPE, wqe_type) + | EHEA_BMASK_SET(EHEA_WR_ID_INDEX, index); + rwqe->sg_list[0].l_key = pr->recv_mr.lkey; + rwqe->sg_list[0].vaddr = tmp_addr; + rwqe->sg_list[0].len = packet_size; + rwqe->data_segments = 1; + + index++; + index &= max_index_mask; + adder++; + } + + q_skba->index = index; + if (adder == 0) + goto out; + + /* Ring doorbell */ + iosync(); + if (rq_nr == 2) + ehea_update_rq2a(pr->qp, adder); + else + ehea_update_rq3a(pr->qp, adder); +out: + return ret; +} + + +static int ehea_refill_rq2(struct ehea_port_res *pr, int nr_of_wqes) +{ + return ehea_refill_rq_def(pr, &pr->rq2_skba, 2, + nr_of_wqes, EHEA_RWQE2_TYPE, + EHEA_RQ2_PKT_SIZE); +} + + +static int ehea_refill_rq3(struct ehea_port_res *pr, int nr_of_wqes) +{ + return ehea_refill_rq_def(pr, &pr->rq3_skba, 3, + nr_of_wqes, EHEA_RWQE3_TYPE, + EHEA_MAX_PACKET_SIZE); +} + +static inline int ehea_check_cqe(struct ehea_cqe *cqe, int *rq_num) +{ + *rq_num = (cqe->type & EHEA_CQE_TYPE_RQ) >> 5; + if ((cqe->status & EHEA_CQE_STAT_ERR_MASK) == 0) + return 0; + if (((cqe->status & EHEA_CQE_STAT_ERR_TCP) != 0) && + (cqe->header_length == 0)) + return 0; + return -EINVAL; +} + +static inline void ehea_fill_skb(struct net_device *dev, + struct sk_buff *skb, struct ehea_cqe *cqe, + struct ehea_port_res *pr) +{ + int length = cqe->num_bytes_transfered - 4; /*remove CRC */ + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, dev); + + /* The packet was not an IPV4 packet so a complemented checksum was + calculated. The value is found in the Internet Checksum field. */ + if (cqe->status & EHEA_CQE_BLIND_CKSUM) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold(~cqe->inet_checksum_value); + } else + skb->ip_summed = CHECKSUM_UNNECESSARY; + + skb_record_rx_queue(skb, pr - &pr->port->port_res[0]); +} + +static inline struct sk_buff *get_skb_by_index(struct sk_buff **skb_array, + int arr_len, + struct ehea_cqe *cqe) +{ + int skb_index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, cqe->wr_id); + struct sk_buff *skb; + void *pref; + int x; + + x = skb_index + 1; + x &= (arr_len - 1); + + pref = skb_array[x]; + if (pref) { + prefetchw(pref); + prefetchw(pref + EHEA_CACHE_LINE); + + pref = (skb_array[x]->data); + prefetch(pref); + prefetch(pref + EHEA_CACHE_LINE); + prefetch(pref + EHEA_CACHE_LINE * 2); + prefetch(pref + EHEA_CACHE_LINE * 3); + } + + skb = skb_array[skb_index]; + skb_array[skb_index] = NULL; + return skb; +} + +static inline struct sk_buff *get_skb_by_index_ll(struct sk_buff **skb_array, + int arr_len, int wqe_index) +{ + struct sk_buff *skb; + void *pref; + int x; + + x = wqe_index + 1; + x &= (arr_len - 1); + + pref = skb_array[x]; + if (pref) { + prefetchw(pref); + prefetchw(pref + EHEA_CACHE_LINE); + + pref = (skb_array[x]->data); + prefetchw(pref); + prefetchw(pref + EHEA_CACHE_LINE); + } + + skb = skb_array[wqe_index]; + skb_array[wqe_index] = NULL; + return skb; +} + +static int ehea_treat_poll_error(struct ehea_port_res *pr, int rq, + struct ehea_cqe *cqe, int *processed_rq2, + int *processed_rq3) +{ + struct sk_buff *skb; + + if (cqe->status & EHEA_CQE_STAT_ERR_TCP) + pr->p_stats.err_tcp_cksum++; + if (cqe->status & EHEA_CQE_STAT_ERR_IP) + pr->p_stats.err_ip_cksum++; + if (cqe->status & EHEA_CQE_STAT_ERR_CRC) + pr->p_stats.err_frame_crc++; + + if (rq == 2) { + *processed_rq2 += 1; + skb = get_skb_by_index(pr->rq2_skba.arr, pr->rq2_skba.len, cqe); + dev_kfree_skb(skb); + } else if (rq == 3) { + *processed_rq3 += 1; + skb = get_skb_by_index(pr->rq3_skba.arr, pr->rq3_skba.len, cqe); + dev_kfree_skb(skb); + } + + if (cqe->status & EHEA_CQE_STAT_FAT_ERR_MASK) { + if (netif_msg_rx_err(pr->port)) { + pr_err("Critical receive error for QP %d. Resetting port.\n", + pr->qp->init_attr.qp_nr); + ehea_dump(cqe, sizeof(*cqe), "CQE"); + } + ehea_schedule_port_reset(pr->port); + return 1; + } + + return 0; +} + +static int ehea_proc_rwqes(struct net_device *dev, + struct ehea_port_res *pr, + int budget) +{ + struct ehea_port *port = pr->port; + struct ehea_qp *qp = pr->qp; + struct ehea_cqe *cqe; + struct sk_buff *skb; + struct sk_buff **skb_arr_rq1 = pr->rq1_skba.arr; + struct sk_buff **skb_arr_rq2 = pr->rq2_skba.arr; + struct sk_buff **skb_arr_rq3 = pr->rq3_skba.arr; + int skb_arr_rq1_len = pr->rq1_skba.len; + int skb_arr_rq2_len = pr->rq2_skba.len; + int skb_arr_rq3_len = pr->rq3_skba.len; + int processed, processed_rq1, processed_rq2, processed_rq3; + u64 processed_bytes = 0; + int wqe_index, last_wqe_index, rq, port_reset; + + processed = processed_rq1 = processed_rq2 = processed_rq3 = 0; + last_wqe_index = 0; + + cqe = ehea_poll_rq1(qp, &wqe_index); + while ((processed < budget) && cqe) { + ehea_inc_rq1(qp); + processed_rq1++; + processed++; + if (netif_msg_rx_status(port)) + ehea_dump(cqe, sizeof(*cqe), "CQE"); + + last_wqe_index = wqe_index; + rmb(); + if (!ehea_check_cqe(cqe, &rq)) { + if (rq == 1) { + /* LL RQ1 */ + skb = get_skb_by_index_ll(skb_arr_rq1, + skb_arr_rq1_len, + wqe_index); + if (unlikely(!skb)) { + netif_info(port, rx_err, dev, + "LL rq1: skb=NULL\n"); + + skb = netdev_alloc_skb(dev, + EHEA_L_PKT_SIZE); + if (!skb) + break; + } + skb_copy_to_linear_data(skb, ((char *)cqe) + 64, + cqe->num_bytes_transfered - 4); + ehea_fill_skb(dev, skb, cqe, pr); + } else if (rq == 2) { + /* RQ2 */ + skb = get_skb_by_index(skb_arr_rq2, + skb_arr_rq2_len, cqe); + if (unlikely(!skb)) { + netif_err(port, rx_err, dev, + "rq2: skb=NULL\n"); + break; + } + ehea_fill_skb(dev, skb, cqe, pr); + processed_rq2++; + } else { + /* RQ3 */ + skb = get_skb_by_index(skb_arr_rq3, + skb_arr_rq3_len, cqe); + if (unlikely(!skb)) { + netif_err(port, rx_err, dev, + "rq3: skb=NULL\n"); + break; + } + ehea_fill_skb(dev, skb, cqe, pr); + processed_rq3++; + } + + processed_bytes += skb->len; + + if (cqe->status & EHEA_CQE_VLAN_TAG_XTRACT) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + cqe->vlan_tag); + + napi_gro_receive(&pr->napi, skb); + } else { + pr->p_stats.poll_receive_errors++; + port_reset = ehea_treat_poll_error(pr, rq, cqe, + &processed_rq2, + &processed_rq3); + if (port_reset) + break; + } + cqe = ehea_poll_rq1(qp, &wqe_index); + } + + pr->rx_packets += processed; + pr->rx_bytes += processed_bytes; + + ehea_refill_rq1(pr, last_wqe_index, processed_rq1); + ehea_refill_rq2(pr, processed_rq2); + ehea_refill_rq3(pr, processed_rq3); + + return processed; +} + +#define SWQE_RESTART_CHECK 0xdeadbeaff00d0000ull + +static void reset_sq_restart_flag(struct ehea_port *port) +{ + int i; + + for (i = 0; i < port->num_def_qps; i++) { + struct ehea_port_res *pr = &port->port_res[i]; + pr->sq_restart_flag = 0; + } + wake_up(&port->restart_wq); +} + +static void check_sqs(struct ehea_port *port) +{ + struct ehea_swqe *swqe; + int swqe_index; + int i; + + for (i = 0; i < port->num_def_qps; i++) { + struct ehea_port_res *pr = &port->port_res[i]; + int ret; + swqe = ehea_get_swqe(pr->qp, &swqe_index); + memset(swqe, 0, SWQE_HEADER_SIZE); + atomic_dec(&pr->swqe_avail); + + swqe->tx_control |= EHEA_SWQE_PURGE; + swqe->wr_id = SWQE_RESTART_CHECK; + swqe->tx_control |= EHEA_SWQE_SIGNALLED_COMPLETION; + swqe->tx_control |= EHEA_SWQE_IMM_DATA_PRESENT; + swqe->immediate_data_length = 80; + + ehea_post_swqe(pr->qp, swqe); + + ret = wait_event_timeout(port->restart_wq, + pr->sq_restart_flag == 0, + msecs_to_jiffies(100)); + + if (!ret) { + pr_err("HW/SW queues out of sync\n"); + ehea_schedule_port_reset(pr->port); + return; + } + } +} + + +static struct ehea_cqe *ehea_proc_cqes(struct ehea_port_res *pr, int my_quota) +{ + struct sk_buff *skb; + struct ehea_cq *send_cq = pr->send_cq; + struct ehea_cqe *cqe; + int quota = my_quota; + int cqe_counter = 0; + int swqe_av = 0; + int index; + struct netdev_queue *txq = netdev_get_tx_queue(pr->port->netdev, + pr - &pr->port->port_res[0]); + + cqe = ehea_poll_cq(send_cq); + while (cqe && (quota > 0)) { + ehea_inc_cq(send_cq); + + cqe_counter++; + rmb(); + + if (cqe->wr_id == SWQE_RESTART_CHECK) { + pr->sq_restart_flag = 1; + swqe_av++; + break; + } + + if (cqe->status & EHEA_CQE_STAT_ERR_MASK) { + pr_err("Bad send completion status=0x%04X\n", + cqe->status); + + if (netif_msg_tx_err(pr->port)) + ehea_dump(cqe, sizeof(*cqe), "Send CQE"); + + if (cqe->status & EHEA_CQE_STAT_RESET_MASK) { + pr_err("Resetting port\n"); + ehea_schedule_port_reset(pr->port); + break; + } + } + + if (netif_msg_tx_done(pr->port)) + ehea_dump(cqe, sizeof(*cqe), "CQE"); + + if (likely(EHEA_BMASK_GET(EHEA_WR_ID_TYPE, cqe->wr_id) + == EHEA_SWQE2_TYPE)) { + + index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, cqe->wr_id); + skb = pr->sq_skba.arr[index]; + dev_consume_skb_any(skb); + pr->sq_skba.arr[index] = NULL; + } + + swqe_av += EHEA_BMASK_GET(EHEA_WR_ID_REFILL, cqe->wr_id); + quota--; + + cqe = ehea_poll_cq(send_cq); + } + + ehea_update_feca(send_cq, cqe_counter); + atomic_add(swqe_av, &pr->swqe_avail); + + if (unlikely(netif_tx_queue_stopped(txq) && + (atomic_read(&pr->swqe_avail) >= pr->swqe_refill_th))) { + __netif_tx_lock(txq, smp_processor_id()); + if (netif_tx_queue_stopped(txq) && + (atomic_read(&pr->swqe_avail) >= pr->swqe_refill_th)) + netif_tx_wake_queue(txq); + __netif_tx_unlock(txq); + } + + wake_up(&pr->port->swqe_avail_wq); + + return cqe; +} + +#define EHEA_POLL_MAX_CQES 65535 + +static int ehea_poll(struct napi_struct *napi, int budget) +{ + struct ehea_port_res *pr = container_of(napi, struct ehea_port_res, + napi); + struct net_device *dev = pr->port->netdev; + struct ehea_cqe *cqe; + struct ehea_cqe *cqe_skb = NULL; + int wqe_index; + int rx = 0; + + cqe_skb = ehea_proc_cqes(pr, EHEA_POLL_MAX_CQES); + rx += ehea_proc_rwqes(dev, pr, budget - rx); + + while (rx != budget) { + napi_complete(napi); + ehea_reset_cq_ep(pr->recv_cq); + ehea_reset_cq_ep(pr->send_cq); + ehea_reset_cq_n1(pr->recv_cq); + ehea_reset_cq_n1(pr->send_cq); + rmb(); + cqe = ehea_poll_rq1(pr->qp, &wqe_index); + cqe_skb = ehea_poll_cq(pr->send_cq); + + if (!cqe && !cqe_skb) + return rx; + + if (!napi_reschedule(napi)) + return rx; + + cqe_skb = ehea_proc_cqes(pr, EHEA_POLL_MAX_CQES); + rx += ehea_proc_rwqes(dev, pr, budget - rx); + } + + return rx; +} + +static irqreturn_t ehea_recv_irq_handler(int irq, void *param) +{ + struct ehea_port_res *pr = param; + + napi_schedule(&pr->napi); + + return IRQ_HANDLED; +} + +static irqreturn_t ehea_qp_aff_irq_handler(int irq, void *param) +{ + struct ehea_port *port = param; + struct ehea_eqe *eqe; + struct ehea_qp *qp; + u32 qp_token; + u64 resource_type, aer, aerr; + int reset_port = 0; + + eqe = ehea_poll_eq(port->qp_eq); + + while (eqe) { + qp_token = EHEA_BMASK_GET(EHEA_EQE_QP_TOKEN, eqe->entry); + pr_err("QP aff_err: entry=0x%llx, token=0x%x\n", + eqe->entry, qp_token); + + qp = port->port_res[qp_token].qp; + + resource_type = ehea_error_data(port->adapter, qp->fw_handle, + &aer, &aerr); + + if (resource_type == EHEA_AER_RESTYPE_QP) { + if ((aer & EHEA_AER_RESET_MASK) || + (aerr & EHEA_AERR_RESET_MASK)) + reset_port = 1; + } else + reset_port = 1; /* Reset in case of CQ or EQ error */ + + eqe = ehea_poll_eq(port->qp_eq); + } + + if (reset_port) { + pr_err("Resetting port\n"); + ehea_schedule_port_reset(port); + } + + return IRQ_HANDLED; +} + +static struct ehea_port *ehea_get_port(struct ehea_adapter *adapter, + int logical_port) +{ + int i; + + for (i = 0; i < EHEA_MAX_PORTS; i++) + if (adapter->port[i]) + if (adapter->port[i]->logical_port_id == logical_port) + return adapter->port[i]; + return NULL; +} + +int ehea_sense_port_attr(struct ehea_port *port) +{ + int ret; + u64 hret; + struct hcp_ehea_port_cb0 *cb0; + + /* may be called via ehea_neq_tasklet() */ + cb0 = (void *)get_zeroed_page(GFP_ATOMIC); + if (!cb0) { + pr_err("no mem for cb0\n"); + ret = -ENOMEM; + goto out; + } + + hret = ehea_h_query_ehea_port(port->adapter->handle, + port->logical_port_id, H_PORT_CB0, + EHEA_BMASK_SET(H_PORT_CB0_ALL, 0xFFFF), + cb0); + if (hret != H_SUCCESS) { + ret = -EIO; + goto out_free; + } + + /* MAC address */ + port->mac_addr = cb0->port_mac_addr << 16; + + if (!is_valid_ether_addr((u8 *)&port->mac_addr)) { + ret = -EADDRNOTAVAIL; + goto out_free; + } + + /* Port speed */ + switch (cb0->port_speed) { + case H_SPEED_10M_H: + port->port_speed = EHEA_SPEED_10M; + port->full_duplex = 0; + break; + case H_SPEED_10M_F: + port->port_speed = EHEA_SPEED_10M; + port->full_duplex = 1; + break; + case H_SPEED_100M_H: + port->port_speed = EHEA_SPEED_100M; + port->full_duplex = 0; + break; + case H_SPEED_100M_F: + port->port_speed = EHEA_SPEED_100M; + port->full_duplex = 1; + break; + case H_SPEED_1G_F: + port->port_speed = EHEA_SPEED_1G; + port->full_duplex = 1; + break; + case H_SPEED_10G_F: + port->port_speed = EHEA_SPEED_10G; + port->full_duplex = 1; + break; + default: + port->port_speed = 0; + port->full_duplex = 0; + break; + } + + port->autoneg = 1; + port->num_mcs = cb0->num_default_qps; + + /* Number of default QPs */ + if (use_mcs) + port->num_def_qps = cb0->num_default_qps; + else + port->num_def_qps = 1; + + if (!port->num_def_qps) { + ret = -EINVAL; + goto out_free; + } + + ret = 0; +out_free: + if (ret || netif_msg_probe(port)) + ehea_dump(cb0, sizeof(*cb0), "ehea_sense_port_attr"); + free_page((unsigned long)cb0); +out: + return ret; +} + +int ehea_set_portspeed(struct ehea_port *port, u32 port_speed) +{ + struct hcp_ehea_port_cb4 *cb4; + u64 hret; + int ret = 0; + + cb4 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb4) { + pr_err("no mem for cb4\n"); + ret = -ENOMEM; + goto out; + } + + cb4->port_speed = port_speed; + + netif_carrier_off(port->netdev); + + hret = ehea_h_modify_ehea_port(port->adapter->handle, + port->logical_port_id, + H_PORT_CB4, H_PORT_CB4_SPEED, cb4); + if (hret == H_SUCCESS) { + port->autoneg = port_speed == EHEA_SPEED_AUTONEG ? 1 : 0; + + hret = ehea_h_query_ehea_port(port->adapter->handle, + port->logical_port_id, + H_PORT_CB4, H_PORT_CB4_SPEED, + cb4); + if (hret == H_SUCCESS) { + switch (cb4->port_speed) { + case H_SPEED_10M_H: + port->port_speed = EHEA_SPEED_10M; + port->full_duplex = 0; + break; + case H_SPEED_10M_F: + port->port_speed = EHEA_SPEED_10M; + port->full_duplex = 1; + break; + case H_SPEED_100M_H: + port->port_speed = EHEA_SPEED_100M; + port->full_duplex = 0; + break; + case H_SPEED_100M_F: + port->port_speed = EHEA_SPEED_100M; + port->full_duplex = 1; + break; + case H_SPEED_1G_F: + port->port_speed = EHEA_SPEED_1G; + port->full_duplex = 1; + break; + case H_SPEED_10G_F: + port->port_speed = EHEA_SPEED_10G; + port->full_duplex = 1; + break; + default: + port->port_speed = 0; + port->full_duplex = 0; + break; + } + } else { + pr_err("Failed sensing port speed\n"); + ret = -EIO; + } + } else { + if (hret == H_AUTHORITY) { + pr_info("Hypervisor denied setting port speed\n"); + ret = -EPERM; + } else { + ret = -EIO; + pr_err("Failed setting port speed\n"); + } + } + if (!prop_carrier_state || (port->phy_link == EHEA_PHY_LINK_UP)) + netif_carrier_on(port->netdev); + + free_page((unsigned long)cb4); +out: + return ret; +} + +static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe) +{ + int ret; + u8 ec; + u8 portnum; + struct ehea_port *port; + struct net_device *dev; + + ec = EHEA_BMASK_GET(NEQE_EVENT_CODE, eqe); + portnum = EHEA_BMASK_GET(NEQE_PORTNUM, eqe); + port = ehea_get_port(adapter, portnum); + if (!port) { + netdev_err(NULL, "unknown portnum %x\n", portnum); + return; + } + dev = port->netdev; + + switch (ec) { + case EHEA_EC_PORTSTATE_CHG: /* port state change */ + + if (EHEA_BMASK_GET(NEQE_PORT_UP, eqe)) { + if (!netif_carrier_ok(dev)) { + ret = ehea_sense_port_attr(port); + if (ret) { + netdev_err(dev, "failed resensing port attributes\n"); + break; + } + + netif_info(port, link, dev, + "Logical port up: %dMbps %s Duplex\n", + port->port_speed, + port->full_duplex == 1 ? + "Full" : "Half"); + + netif_carrier_on(dev); + netif_wake_queue(dev); + } + } else + if (netif_carrier_ok(dev)) { + netif_info(port, link, dev, + "Logical port down\n"); + netif_carrier_off(dev); + netif_tx_disable(dev); + } + + if (EHEA_BMASK_GET(NEQE_EXTSWITCH_PORT_UP, eqe)) { + port->phy_link = EHEA_PHY_LINK_UP; + netif_info(port, link, dev, + "Physical port up\n"); + if (prop_carrier_state) + netif_carrier_on(dev); + } else { + port->phy_link = EHEA_PHY_LINK_DOWN; + netif_info(port, link, dev, + "Physical port down\n"); + if (prop_carrier_state) + netif_carrier_off(dev); + } + + if (EHEA_BMASK_GET(NEQE_EXTSWITCH_PRIMARY, eqe)) + netdev_info(dev, + "External switch port is primary port\n"); + else + netdev_info(dev, + "External switch port is backup port\n"); + + break; + case EHEA_EC_ADAPTER_MALFUNC: + netdev_err(dev, "Adapter malfunction\n"); + break; + case EHEA_EC_PORT_MALFUNC: + netdev_info(dev, "Port malfunction\n"); + netif_carrier_off(dev); + netif_tx_disable(dev); + break; + default: + netdev_err(dev, "unknown event code %x, eqe=0x%llX\n", ec, eqe); + break; + } +} + +static void ehea_neq_tasklet(struct tasklet_struct *t) +{ + struct ehea_adapter *adapter = from_tasklet(adapter, t, neq_tasklet); + struct ehea_eqe *eqe; + u64 event_mask; + + eqe = ehea_poll_eq(adapter->neq); + pr_debug("eqe=%p\n", eqe); + + while (eqe) { + pr_debug("*eqe=%lx\n", (unsigned long) eqe->entry); + ehea_parse_eqe(adapter, eqe->entry); + eqe = ehea_poll_eq(adapter->neq); + pr_debug("next eqe=%p\n", eqe); + } + + event_mask = EHEA_BMASK_SET(NELR_PORTSTATE_CHG, 1) + | EHEA_BMASK_SET(NELR_ADAPTER_MALFUNC, 1) + | EHEA_BMASK_SET(NELR_PORT_MALFUNC, 1); + + ehea_h_reset_events(adapter->handle, + adapter->neq->fw_handle, event_mask); +} + +static irqreturn_t ehea_interrupt_neq(int irq, void *param) +{ + struct ehea_adapter *adapter = param; + tasklet_hi_schedule(&adapter->neq_tasklet); + return IRQ_HANDLED; +} + + +static int ehea_fill_port_res(struct ehea_port_res *pr) +{ + int ret; + struct ehea_qp_init_attr *init_attr = &pr->qp->init_attr; + + ehea_init_fill_rq1(pr, pr->rq1_skba.len); + + ret = ehea_refill_rq2(pr, init_attr->act_nr_rwqes_rq2 - 1); + + ret |= ehea_refill_rq3(pr, init_attr->act_nr_rwqes_rq3 - 1); + + return ret; +} + +static int ehea_reg_interrupts(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_port_res *pr; + int i, ret; + + + snprintf(port->int_aff_name, EHEA_IRQ_NAME_SIZE - 1, "%s-aff", + dev->name); + + ret = ibmebus_request_irq(port->qp_eq->attr.ist1, + ehea_qp_aff_irq_handler, + 0, port->int_aff_name, port); + if (ret) { + netdev_err(dev, "failed registering irq for qp_aff_irq_handler:ist=%X\n", + port->qp_eq->attr.ist1); + goto out_free_qpeq; + } + + netif_info(port, ifup, dev, + "irq_handle 0x%X for function qp_aff_irq_handler registered\n", + port->qp_eq->attr.ist1); + + + for (i = 0; i < port->num_def_qps; i++) { + pr = &port->port_res[i]; + snprintf(pr->int_send_name, EHEA_IRQ_NAME_SIZE - 1, + "%s-queue%d", dev->name, i); + ret = ibmebus_request_irq(pr->eq->attr.ist1, + ehea_recv_irq_handler, + 0, pr->int_send_name, pr); + if (ret) { + netdev_err(dev, "failed registering irq for ehea_queue port_res_nr:%d, ist=%X\n", + i, pr->eq->attr.ist1); + goto out_free_req; + } + netif_info(port, ifup, dev, + "irq_handle 0x%X for function ehea_queue_int %d registered\n", + pr->eq->attr.ist1, i); + } +out: + return ret; + + +out_free_req: + while (--i >= 0) { + u32 ist = port->port_res[i].eq->attr.ist1; + ibmebus_free_irq(ist, &port->port_res[i]); + } + +out_free_qpeq: + ibmebus_free_irq(port->qp_eq->attr.ist1, port); + i = port->num_def_qps; + + goto out; + +} + +static void ehea_free_interrupts(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_port_res *pr; + int i; + + /* send */ + + for (i = 0; i < port->num_def_qps; i++) { + pr = &port->port_res[i]; + ibmebus_free_irq(pr->eq->attr.ist1, pr); + netif_info(port, intr, dev, + "free send irq for res %d with handle 0x%X\n", + i, pr->eq->attr.ist1); + } + + /* associated events */ + ibmebus_free_irq(port->qp_eq->attr.ist1, port); + netif_info(port, intr, dev, + "associated event interrupt for handle 0x%X freed\n", + port->qp_eq->attr.ist1); +} + +static int ehea_configure_port(struct ehea_port *port) +{ + int ret, i; + u64 hret, mask; + struct hcp_ehea_port_cb0 *cb0; + + ret = -ENOMEM; + cb0 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb0) + goto out; + + cb0->port_rc = EHEA_BMASK_SET(PXLY_RC_VALID, 1) + | EHEA_BMASK_SET(PXLY_RC_IP_CHKSUM, 1) + | EHEA_BMASK_SET(PXLY_RC_TCP_UDP_CHKSUM, 1) + | EHEA_BMASK_SET(PXLY_RC_VLAN_XTRACT, 1) + | EHEA_BMASK_SET(PXLY_RC_VLAN_TAG_FILTER, + PXLY_RC_VLAN_FILTER) + | EHEA_BMASK_SET(PXLY_RC_JUMBO_FRAME, 1); + + for (i = 0; i < port->num_mcs; i++) + if (use_mcs) + cb0->default_qpn_arr[i] = + port->port_res[i].qp->init_attr.qp_nr; + else + cb0->default_qpn_arr[i] = + port->port_res[0].qp->init_attr.qp_nr; + + if (netif_msg_ifup(port)) + ehea_dump(cb0, sizeof(*cb0), "ehea_configure_port"); + + mask = EHEA_BMASK_SET(H_PORT_CB0_PRC, 1) + | EHEA_BMASK_SET(H_PORT_CB0_DEFQPNARRAY, 1); + + hret = ehea_h_modify_ehea_port(port->adapter->handle, + port->logical_port_id, + H_PORT_CB0, mask, cb0); + ret = -EIO; + if (hret != H_SUCCESS) + goto out_free; + + ret = 0; + +out_free: + free_page((unsigned long)cb0); +out: + return ret; +} + +static int ehea_gen_smrs(struct ehea_port_res *pr) +{ + int ret; + struct ehea_adapter *adapter = pr->port->adapter; + + ret = ehea_gen_smr(adapter, &adapter->mr, &pr->send_mr); + if (ret) + goto out; + + ret = ehea_gen_smr(adapter, &adapter->mr, &pr->recv_mr); + if (ret) + goto out_free; + + return 0; + +out_free: + ehea_rem_mr(&pr->send_mr); +out: + pr_err("Generating SMRS failed\n"); + return -EIO; +} + +static int ehea_rem_smrs(struct ehea_port_res *pr) +{ + if ((ehea_rem_mr(&pr->send_mr)) || + (ehea_rem_mr(&pr->recv_mr))) + return -EIO; + else + return 0; +} + +static int ehea_init_q_skba(struct ehea_q_skb_arr *q_skba, int max_q_entries) +{ + int arr_size = sizeof(void *) * max_q_entries; + + q_skba->arr = vzalloc(arr_size); + if (!q_skba->arr) + return -ENOMEM; + + q_skba->len = max_q_entries; + q_skba->index = 0; + q_skba->os_skbs = 0; + + return 0; +} + +static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr, + struct port_res_cfg *pr_cfg, int queue_token) +{ + struct ehea_adapter *adapter = port->adapter; + enum ehea_eq_type eq_type = EHEA_EQ; + struct ehea_qp_init_attr *init_attr = NULL; + int ret = -EIO; + u64 tx_bytes, rx_bytes, tx_packets, rx_packets; + + tx_bytes = pr->tx_bytes; + tx_packets = pr->tx_packets; + rx_bytes = pr->rx_bytes; + rx_packets = pr->rx_packets; + + memset(pr, 0, sizeof(struct ehea_port_res)); + + pr->tx_bytes = tx_bytes; + pr->tx_packets = tx_packets; + pr->rx_bytes = rx_bytes; + pr->rx_packets = rx_packets; + + pr->port = port; + + pr->eq = ehea_create_eq(adapter, eq_type, EHEA_MAX_ENTRIES_EQ, 0); + if (!pr->eq) { + pr_err("create_eq failed (eq)\n"); + goto out_free; + } + + pr->recv_cq = ehea_create_cq(adapter, pr_cfg->max_entries_rcq, + pr->eq->fw_handle, + port->logical_port_id); + if (!pr->recv_cq) { + pr_err("create_cq failed (cq_recv)\n"); + goto out_free; + } + + pr->send_cq = ehea_create_cq(adapter, pr_cfg->max_entries_scq, + pr->eq->fw_handle, + port->logical_port_id); + if (!pr->send_cq) { + pr_err("create_cq failed (cq_send)\n"); + goto out_free; + } + + if (netif_msg_ifup(port)) + pr_info("Send CQ: act_nr_cqes=%d, Recv CQ: act_nr_cqes=%d\n", + pr->send_cq->attr.act_nr_of_cqes, + pr->recv_cq->attr.act_nr_of_cqes); + + init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); + if (!init_attr) { + ret = -ENOMEM; + pr_err("no mem for ehea_qp_init_attr\n"); + goto out_free; + } + + init_attr->low_lat_rq1 = 1; + init_attr->signalingtype = 1; /* generate CQE if specified in WQE */ + init_attr->rq_count = 3; + init_attr->qp_token = queue_token; + init_attr->max_nr_send_wqes = pr_cfg->max_entries_sq; + init_attr->max_nr_rwqes_rq1 = pr_cfg->max_entries_rq1; + init_attr->max_nr_rwqes_rq2 = pr_cfg->max_entries_rq2; + init_attr->max_nr_rwqes_rq3 = pr_cfg->max_entries_rq3; + init_attr->wqe_size_enc_sq = EHEA_SG_SQ; + init_attr->wqe_size_enc_rq1 = EHEA_SG_RQ1; + init_attr->wqe_size_enc_rq2 = EHEA_SG_RQ2; + init_attr->wqe_size_enc_rq3 = EHEA_SG_RQ3; + init_attr->rq2_threshold = EHEA_RQ2_THRESHOLD; + init_attr->rq3_threshold = EHEA_RQ3_THRESHOLD; + init_attr->port_nr = port->logical_port_id; + init_attr->send_cq_handle = pr->send_cq->fw_handle; + init_attr->recv_cq_handle = pr->recv_cq->fw_handle; + init_attr->aff_eq_handle = port->qp_eq->fw_handle; + + pr->qp = ehea_create_qp(adapter, adapter->pd, init_attr); + if (!pr->qp) { + pr_err("create_qp failed\n"); + ret = -EIO; + goto out_free; + } + + if (netif_msg_ifup(port)) + pr_info("QP: qp_nr=%d\n act_nr_snd_wqe=%d\n nr_rwqe_rq1=%d\n nr_rwqe_rq2=%d\n nr_rwqe_rq3=%d\n", + init_attr->qp_nr, + init_attr->act_nr_send_wqes, + init_attr->act_nr_rwqes_rq1, + init_attr->act_nr_rwqes_rq2, + init_attr->act_nr_rwqes_rq3); + + pr->sq_skba_size = init_attr->act_nr_send_wqes + 1; + + ret = ehea_init_q_skba(&pr->sq_skba, pr->sq_skba_size); + ret |= ehea_init_q_skba(&pr->rq1_skba, init_attr->act_nr_rwqes_rq1 + 1); + ret |= ehea_init_q_skba(&pr->rq2_skba, init_attr->act_nr_rwqes_rq2 + 1); + ret |= ehea_init_q_skba(&pr->rq3_skba, init_attr->act_nr_rwqes_rq3 + 1); + if (ret) + goto out_free; + + pr->swqe_refill_th = init_attr->act_nr_send_wqes / 10; + if (ehea_gen_smrs(pr) != 0) { + ret = -EIO; + goto out_free; + } + + atomic_set(&pr->swqe_avail, init_attr->act_nr_send_wqes - 1); + + kfree(init_attr); + + netif_napi_add(pr->port->netdev, &pr->napi, ehea_poll); + + ret = 0; + goto out; + +out_free: + kfree(init_attr); + vfree(pr->sq_skba.arr); + vfree(pr->rq1_skba.arr); + vfree(pr->rq2_skba.arr); + vfree(pr->rq3_skba.arr); + ehea_destroy_qp(pr->qp); + ehea_destroy_cq(pr->send_cq); + ehea_destroy_cq(pr->recv_cq); + ehea_destroy_eq(pr->eq); +out: + return ret; +} + +static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr) +{ + int ret, i; + + if (pr->qp) + netif_napi_del(&pr->napi); + + ret = ehea_destroy_qp(pr->qp); + + if (!ret) { + ehea_destroy_cq(pr->send_cq); + ehea_destroy_cq(pr->recv_cq); + ehea_destroy_eq(pr->eq); + + for (i = 0; i < pr->rq1_skba.len; i++) + dev_kfree_skb(pr->rq1_skba.arr[i]); + + for (i = 0; i < pr->rq2_skba.len; i++) + dev_kfree_skb(pr->rq2_skba.arr[i]); + + for (i = 0; i < pr->rq3_skba.len; i++) + dev_kfree_skb(pr->rq3_skba.arr[i]); + + for (i = 0; i < pr->sq_skba.len; i++) + dev_kfree_skb(pr->sq_skba.arr[i]); + + vfree(pr->rq1_skba.arr); + vfree(pr->rq2_skba.arr); + vfree(pr->rq3_skba.arr); + vfree(pr->sq_skba.arr); + ret = ehea_rem_smrs(pr); + } + return ret; +} + +static void write_swqe2_immediate(struct sk_buff *skb, struct ehea_swqe *swqe, + u32 lkey) +{ + int skb_data_size = skb_headlen(skb); + u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0]; + struct ehea_vsgentry *sg1entry = &swqe->u.immdata_desc.sg_entry; + unsigned int immediate_len = SWQE2_MAX_IMM; + + swqe->descriptors = 0; + + if (skb_is_gso(skb)) { + swqe->tx_control |= EHEA_SWQE_TSO; + swqe->mss = skb_shinfo(skb)->gso_size; + /* + * For TSO packets we only copy the headers into the + * immediate area. + */ + immediate_len = skb_tcp_all_headers(skb); + } + + if (skb_is_gso(skb) || skb_data_size >= SWQE2_MAX_IMM) { + skb_copy_from_linear_data(skb, imm_data, immediate_len); + swqe->immediate_data_length = immediate_len; + + if (skb_data_size > immediate_len) { + sg1entry->l_key = lkey; + sg1entry->len = skb_data_size - immediate_len; + sg1entry->vaddr = + ehea_map_vaddr(skb->data + immediate_len); + swqe->descriptors++; + } + } else { + skb_copy_from_linear_data(skb, imm_data, skb_data_size); + swqe->immediate_data_length = skb_data_size; + } +} + +static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev, + struct ehea_swqe *swqe, u32 lkey) +{ + struct ehea_vsgentry *sg_list, *sg1entry, *sgentry; + skb_frag_t *frag; + int nfrags, sg1entry_contains_frag_data, i; + + nfrags = skb_shinfo(skb)->nr_frags; + sg1entry = &swqe->u.immdata_desc.sg_entry; + sg_list = (struct ehea_vsgentry *)&swqe->u.immdata_desc.sg_list; + sg1entry_contains_frag_data = 0; + + write_swqe2_immediate(skb, swqe, lkey); + + /* write descriptors */ + if (nfrags > 0) { + if (swqe->descriptors == 0) { + /* sg1entry not yet used */ + frag = &skb_shinfo(skb)->frags[0]; + + /* copy sg1entry data */ + sg1entry->l_key = lkey; + sg1entry->len = skb_frag_size(frag); + sg1entry->vaddr = + ehea_map_vaddr(skb_frag_address(frag)); + swqe->descriptors++; + sg1entry_contains_frag_data = 1; + } + + for (i = sg1entry_contains_frag_data; i < nfrags; i++) { + + frag = &skb_shinfo(skb)->frags[i]; + sgentry = &sg_list[i - sg1entry_contains_frag_data]; + + sgentry->l_key = lkey; + sgentry->len = skb_frag_size(frag); + sgentry->vaddr = ehea_map_vaddr(skb_frag_address(frag)); + swqe->descriptors++; + } + } +} + +static int ehea_broadcast_reg_helper(struct ehea_port *port, u32 hcallid) +{ + int ret = 0; + u64 hret; + u8 reg_type; + + /* De/Register untagged packets */ + reg_type = EHEA_BCMC_BROADCAST | EHEA_BCMC_UNTAGGED; + hret = ehea_h_reg_dereg_bcmc(port->adapter->handle, + port->logical_port_id, + reg_type, port->mac_addr, 0, hcallid); + if (hret != H_SUCCESS) { + pr_err("%sregistering bc address failed (tagged)\n", + hcallid == H_REG_BCMC ? "" : "de"); + ret = -EIO; + goto out_herr; + } + + /* De/Register VLAN packets */ + reg_type = EHEA_BCMC_BROADCAST | EHEA_BCMC_VLANID_ALL; + hret = ehea_h_reg_dereg_bcmc(port->adapter->handle, + port->logical_port_id, + reg_type, port->mac_addr, 0, hcallid); + if (hret != H_SUCCESS) { + pr_err("%sregistering bc address failed (vlan)\n", + hcallid == H_REG_BCMC ? "" : "de"); + ret = -EIO; + } +out_herr: + return ret; +} + +static int ehea_set_mac_addr(struct net_device *dev, void *sa) +{ + struct ehea_port *port = netdev_priv(dev); + struct sockaddr *mac_addr = sa; + struct hcp_ehea_port_cb0 *cb0; + int ret; + u64 hret; + + if (!is_valid_ether_addr(mac_addr->sa_data)) { + ret = -EADDRNOTAVAIL; + goto out; + } + + cb0 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb0) { + pr_err("no mem for cb0\n"); + ret = -ENOMEM; + goto out; + } + + memcpy(&(cb0->port_mac_addr), &(mac_addr->sa_data[0]), ETH_ALEN); + + cb0->port_mac_addr = cb0->port_mac_addr >> 16; + + hret = ehea_h_modify_ehea_port(port->adapter->handle, + port->logical_port_id, H_PORT_CB0, + EHEA_BMASK_SET(H_PORT_CB0_MAC, 1), cb0); + if (hret != H_SUCCESS) { + ret = -EIO; + goto out_free; + } + + eth_hw_addr_set(dev, mac_addr->sa_data); + + /* Deregister old MAC in pHYP */ + if (port->state == EHEA_PORT_UP) { + ret = ehea_broadcast_reg_helper(port, H_DEREG_BCMC); + if (ret) + goto out_upregs; + } + + port->mac_addr = cb0->port_mac_addr << 16; + + /* Register new MAC in pHYP */ + if (port->state == EHEA_PORT_UP) { + ret = ehea_broadcast_reg_helper(port, H_REG_BCMC); + if (ret) + goto out_upregs; + } + + ret = 0; + +out_upregs: + ehea_update_bcmc_registrations(); +out_free: + free_page((unsigned long)cb0); +out: + return ret; +} + +static void ehea_promiscuous_error(u64 hret, int enable) +{ + if (hret == H_AUTHORITY) + pr_info("Hypervisor denied %sabling promiscuous mode\n", + enable == 1 ? "en" : "dis"); + else + pr_err("failed %sabling promiscuous mode\n", + enable == 1 ? "en" : "dis"); +} + +static void ehea_promiscuous(struct net_device *dev, int enable) +{ + struct ehea_port *port = netdev_priv(dev); + struct hcp_ehea_port_cb7 *cb7; + u64 hret; + + if (enable == port->promisc) + return; + + cb7 = (void *)get_zeroed_page(GFP_ATOMIC); + if (!cb7) { + pr_err("no mem for cb7\n"); + goto out; + } + + /* Modify Pxs_DUCQPN in CB7 */ + cb7->def_uc_qpn = enable == 1 ? port->port_res[0].qp->fw_handle : 0; + + hret = ehea_h_modify_ehea_port(port->adapter->handle, + port->logical_port_id, + H_PORT_CB7, H_PORT_CB7_DUCQPN, cb7); + if (hret) { + ehea_promiscuous_error(hret, enable); + goto out; + } + + port->promisc = enable; +out: + free_page((unsigned long)cb7); +} + +static u64 ehea_multicast_reg_helper(struct ehea_port *port, u64 mc_mac_addr, + u32 hcallid) +{ + u64 hret; + u8 reg_type; + + reg_type = EHEA_BCMC_MULTICAST | EHEA_BCMC_UNTAGGED; + if (mc_mac_addr == 0) + reg_type |= EHEA_BCMC_SCOPE_ALL; + + hret = ehea_h_reg_dereg_bcmc(port->adapter->handle, + port->logical_port_id, + reg_type, mc_mac_addr, 0, hcallid); + if (hret) + goto out; + + reg_type = EHEA_BCMC_MULTICAST | EHEA_BCMC_VLANID_ALL; + if (mc_mac_addr == 0) + reg_type |= EHEA_BCMC_SCOPE_ALL; + + hret = ehea_h_reg_dereg_bcmc(port->adapter->handle, + port->logical_port_id, + reg_type, mc_mac_addr, 0, hcallid); +out: + return hret; +} + +static int ehea_drop_multicast_list(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_mc_list *mc_entry = port->mc_list; + struct list_head *pos; + struct list_head *temp; + int ret = 0; + u64 hret; + + list_for_each_safe(pos, temp, &(port->mc_list->list)) { + mc_entry = list_entry(pos, struct ehea_mc_list, list); + + hret = ehea_multicast_reg_helper(port, mc_entry->macaddr, + H_DEREG_BCMC); + if (hret) { + pr_err("failed deregistering mcast MAC\n"); + ret = -EIO; + } + + list_del(pos); + kfree(mc_entry); + } + return ret; +} + +static void ehea_allmulti(struct net_device *dev, int enable) +{ + struct ehea_port *port = netdev_priv(dev); + u64 hret; + + if (!port->allmulti) { + if (enable) { + /* Enable ALLMULTI */ + ehea_drop_multicast_list(dev); + hret = ehea_multicast_reg_helper(port, 0, H_REG_BCMC); + if (!hret) + port->allmulti = 1; + else + netdev_err(dev, + "failed enabling IFF_ALLMULTI\n"); + } + } else { + if (!enable) { + /* Disable ALLMULTI */ + hret = ehea_multicast_reg_helper(port, 0, H_DEREG_BCMC); + if (!hret) + port->allmulti = 0; + else + netdev_err(dev, + "failed disabling IFF_ALLMULTI\n"); + } + } +} + +static void ehea_add_multicast_entry(struct ehea_port *port, u8 *mc_mac_addr) +{ + struct ehea_mc_list *ehea_mcl_entry; + u64 hret; + + ehea_mcl_entry = kzalloc(sizeof(*ehea_mcl_entry), GFP_ATOMIC); + if (!ehea_mcl_entry) + return; + + INIT_LIST_HEAD(&ehea_mcl_entry->list); + + memcpy(&ehea_mcl_entry->macaddr, mc_mac_addr, ETH_ALEN); + + hret = ehea_multicast_reg_helper(port, ehea_mcl_entry->macaddr, + H_REG_BCMC); + if (!hret) + list_add(&ehea_mcl_entry->list, &port->mc_list->list); + else { + pr_err("failed registering mcast MAC\n"); + kfree(ehea_mcl_entry); + } +} + +static void ehea_set_multicast_list(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct netdev_hw_addr *ha; + int ret; + + ehea_promiscuous(dev, !!(dev->flags & IFF_PROMISC)); + + if (dev->flags & IFF_ALLMULTI) { + ehea_allmulti(dev, 1); + goto out; + } + ehea_allmulti(dev, 0); + + if (!netdev_mc_empty(dev)) { + ret = ehea_drop_multicast_list(dev); + if (ret) { + /* Dropping the current multicast list failed. + * Enabling ALL_MULTI is the best we can do. + */ + ehea_allmulti(dev, 1); + } + + if (netdev_mc_count(dev) > port->adapter->max_mc_mac) { + pr_info("Mcast registration limit reached (0x%llx). Use ALLMULTI!\n", + port->adapter->max_mc_mac); + goto out; + } + + netdev_for_each_mc_addr(ha, dev) + ehea_add_multicast_entry(port, ha->addr); + + } +out: + ehea_update_bcmc_registrations(); +} + +static void xmit_common(struct sk_buff *skb, struct ehea_swqe *swqe) +{ + swqe->tx_control |= EHEA_SWQE_IMM_DATA_PRESENT | EHEA_SWQE_CRC; + + if (vlan_get_protocol(skb) != htons(ETH_P_IP)) + return; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + swqe->tx_control |= EHEA_SWQE_IP_CHECKSUM; + + swqe->ip_start = skb_network_offset(skb); + swqe->ip_end = swqe->ip_start + ip_hdrlen(skb) - 1; + + switch (ip_hdr(skb)->protocol) { + case IPPROTO_UDP: + if (skb->ip_summed == CHECKSUM_PARTIAL) + swqe->tx_control |= EHEA_SWQE_TCP_CHECKSUM; + + swqe->tcp_offset = swqe->ip_end + 1 + + offsetof(struct udphdr, check); + break; + + case IPPROTO_TCP: + if (skb->ip_summed == CHECKSUM_PARTIAL) + swqe->tx_control |= EHEA_SWQE_TCP_CHECKSUM; + + swqe->tcp_offset = swqe->ip_end + 1 + + offsetof(struct tcphdr, check); + break; + } +} + +static void ehea_xmit2(struct sk_buff *skb, struct net_device *dev, + struct ehea_swqe *swqe, u32 lkey) +{ + swqe->tx_control |= EHEA_SWQE_DESCRIPTORS_PRESENT; + + xmit_common(skb, swqe); + + write_swqe2_data(skb, dev, swqe, lkey); +} + +static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev, + struct ehea_swqe *swqe) +{ + u8 *imm_data = &swqe->u.immdata_nodesc.immediate_data[0]; + + xmit_common(skb, swqe); + + if (!skb->data_len) + skb_copy_from_linear_data(skb, imm_data, skb->len); + else + skb_copy_bits(skb, 0, imm_data, skb->len); + + swqe->immediate_data_length = skb->len; + dev_consume_skb_any(skb); +} + +static netdev_tx_t ehea_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_swqe *swqe; + u32 lkey; + int swqe_index; + struct ehea_port_res *pr; + struct netdev_queue *txq; + + pr = &port->port_res[skb_get_queue_mapping(skb)]; + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + + swqe = ehea_get_swqe(pr->qp, &swqe_index); + memset(swqe, 0, SWQE_HEADER_SIZE); + atomic_dec(&pr->swqe_avail); + + if (skb_vlan_tag_present(skb)) { + swqe->tx_control |= EHEA_SWQE_VLAN_INSERT; + swqe->vlan_tag = skb_vlan_tag_get(skb); + } + + pr->tx_packets++; + pr->tx_bytes += skb->len; + + if (skb->len <= SWQE3_MAX_IMM) { + u32 sig_iv = port->sig_comp_iv; + u32 swqe_num = pr->swqe_id_counter; + ehea_xmit3(skb, dev, swqe); + swqe->wr_id = EHEA_BMASK_SET(EHEA_WR_ID_TYPE, EHEA_SWQE3_TYPE) + | EHEA_BMASK_SET(EHEA_WR_ID_COUNT, swqe_num); + if (pr->swqe_ll_count >= (sig_iv - 1)) { + swqe->wr_id |= EHEA_BMASK_SET(EHEA_WR_ID_REFILL, + sig_iv); + swqe->tx_control |= EHEA_SWQE_SIGNALLED_COMPLETION; + pr->swqe_ll_count = 0; + } else + pr->swqe_ll_count += 1; + } else { + swqe->wr_id = + EHEA_BMASK_SET(EHEA_WR_ID_TYPE, EHEA_SWQE2_TYPE) + | EHEA_BMASK_SET(EHEA_WR_ID_COUNT, pr->swqe_id_counter) + | EHEA_BMASK_SET(EHEA_WR_ID_REFILL, 1) + | EHEA_BMASK_SET(EHEA_WR_ID_INDEX, pr->sq_skba.index); + pr->sq_skba.arr[pr->sq_skba.index] = skb; + + pr->sq_skba.index++; + pr->sq_skba.index &= (pr->sq_skba.len - 1); + + lkey = pr->send_mr.lkey; + ehea_xmit2(skb, dev, swqe, lkey); + swqe->tx_control |= EHEA_SWQE_SIGNALLED_COMPLETION; + } + pr->swqe_id_counter += 1; + + netif_info(port, tx_queued, dev, + "post swqe on QP %d\n", pr->qp->init_attr.qp_nr); + if (netif_msg_tx_queued(port)) + ehea_dump(swqe, 512, "swqe"); + + if (unlikely(test_bit(__EHEA_STOP_XFER, &ehea_driver_flags))) { + netif_tx_stop_queue(txq); + swqe->tx_control |= EHEA_SWQE_PURGE; + } + + ehea_post_swqe(pr->qp, swqe); + + if (unlikely(atomic_read(&pr->swqe_avail) <= 1)) { + pr->p_stats.queue_stopped++; + netif_tx_stop_queue(txq); + } + + return NETDEV_TX_OK; +} + +static int ehea_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_adapter *adapter = port->adapter; + struct hcp_ehea_port_cb1 *cb1; + int index; + u64 hret; + int err = 0; + + cb1 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb1) { + pr_err("no mem for cb1\n"); + err = -ENOMEM; + goto out; + } + + hret = ehea_h_query_ehea_port(adapter->handle, port->logical_port_id, + H_PORT_CB1, H_PORT_CB1_ALL, cb1); + if (hret != H_SUCCESS) { + pr_err("query_ehea_port failed\n"); + err = -EINVAL; + goto out; + } + + index = (vid / 64); + cb1->vlan_filter[index] |= ((u64)(0x8000000000000000 >> (vid & 0x3F))); + + hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id, + H_PORT_CB1, H_PORT_CB1_ALL, cb1); + if (hret != H_SUCCESS) { + pr_err("modify_ehea_port failed\n"); + err = -EINVAL; + } +out: + free_page((unsigned long)cb1); + return err; +} + +static int ehea_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_adapter *adapter = port->adapter; + struct hcp_ehea_port_cb1 *cb1; + int index; + u64 hret; + int err = 0; + + cb1 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb1) { + pr_err("no mem for cb1\n"); + err = -ENOMEM; + goto out; + } + + hret = ehea_h_query_ehea_port(adapter->handle, port->logical_port_id, + H_PORT_CB1, H_PORT_CB1_ALL, cb1); + if (hret != H_SUCCESS) { + pr_err("query_ehea_port failed\n"); + err = -EINVAL; + goto out; + } + + index = (vid / 64); + cb1->vlan_filter[index] &= ~((u64)(0x8000000000000000 >> (vid & 0x3F))); + + hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id, + H_PORT_CB1, H_PORT_CB1_ALL, cb1); + if (hret != H_SUCCESS) { + pr_err("modify_ehea_port failed\n"); + err = -EINVAL; + } +out: + free_page((unsigned long)cb1); + return err; +} + +static int ehea_activate_qp(struct ehea_adapter *adapter, struct ehea_qp *qp) +{ + int ret = -EIO; + u64 hret; + u16 dummy16 = 0; + u64 dummy64 = 0; + struct hcp_modify_qp_cb0 *cb0; + + cb0 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb0) { + ret = -ENOMEM; + goto out; + } + + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0); + if (hret != H_SUCCESS) { + pr_err("query_ehea_qp failed (1)\n"); + goto out; + } + + cb0->qp_ctl_reg = H_QP_CR_STATE_INITIALIZED; + hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, 1), cb0, + &dummy64, &dummy64, &dummy16, &dummy16); + if (hret != H_SUCCESS) { + pr_err("modify_ehea_qp failed (1)\n"); + goto out; + } + + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0); + if (hret != H_SUCCESS) { + pr_err("query_ehea_qp failed (2)\n"); + goto out; + } + + cb0->qp_ctl_reg = H_QP_CR_ENABLED | H_QP_CR_STATE_INITIALIZED; + hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, 1), cb0, + &dummy64, &dummy64, &dummy16, &dummy16); + if (hret != H_SUCCESS) { + pr_err("modify_ehea_qp failed (2)\n"); + goto out; + } + + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0); + if (hret != H_SUCCESS) { + pr_err("query_ehea_qp failed (3)\n"); + goto out; + } + + cb0->qp_ctl_reg = H_QP_CR_ENABLED | H_QP_CR_STATE_RDY2SND; + hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, 1), cb0, + &dummy64, &dummy64, &dummy16, &dummy16); + if (hret != H_SUCCESS) { + pr_err("modify_ehea_qp failed (3)\n"); + goto out; + } + + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0); + if (hret != H_SUCCESS) { + pr_err("query_ehea_qp failed (4)\n"); + goto out; + } + + ret = 0; +out: + free_page((unsigned long)cb0); + return ret; +} + +static int ehea_port_res_setup(struct ehea_port *port, int def_qps) +{ + int ret, i; + struct port_res_cfg pr_cfg, pr_cfg_small_rx; + enum ehea_eq_type eq_type = EHEA_EQ; + + port->qp_eq = ehea_create_eq(port->adapter, eq_type, + EHEA_MAX_ENTRIES_EQ, 1); + if (!port->qp_eq) { + ret = -EINVAL; + pr_err("ehea_create_eq failed (qp_eq)\n"); + goto out_kill_eq; + } + + pr_cfg.max_entries_rcq = rq1_entries + rq2_entries + rq3_entries; + pr_cfg.max_entries_scq = sq_entries * 2; + pr_cfg.max_entries_sq = sq_entries; + pr_cfg.max_entries_rq1 = rq1_entries; + pr_cfg.max_entries_rq2 = rq2_entries; + pr_cfg.max_entries_rq3 = rq3_entries; + + pr_cfg_small_rx.max_entries_rcq = 1; + pr_cfg_small_rx.max_entries_scq = sq_entries; + pr_cfg_small_rx.max_entries_sq = sq_entries; + pr_cfg_small_rx.max_entries_rq1 = 1; + pr_cfg_small_rx.max_entries_rq2 = 1; + pr_cfg_small_rx.max_entries_rq3 = 1; + + for (i = 0; i < def_qps; i++) { + ret = ehea_init_port_res(port, &port->port_res[i], &pr_cfg, i); + if (ret) + goto out_clean_pr; + } + for (i = def_qps; i < def_qps; i++) { + ret = ehea_init_port_res(port, &port->port_res[i], + &pr_cfg_small_rx, i); + if (ret) + goto out_clean_pr; + } + + return 0; + +out_clean_pr: + while (--i >= 0) + ehea_clean_portres(port, &port->port_res[i]); + +out_kill_eq: + ehea_destroy_eq(port->qp_eq); + return ret; +} + +static int ehea_clean_all_portres(struct ehea_port *port) +{ + int ret = 0; + int i; + + for (i = 0; i < port->num_def_qps; i++) + ret |= ehea_clean_portres(port, &port->port_res[i]); + + ret |= ehea_destroy_eq(port->qp_eq); + + return ret; +} + +static void ehea_remove_adapter_mr(struct ehea_adapter *adapter) +{ + if (adapter->active_ports) + return; + + ehea_rem_mr(&adapter->mr); +} + +static int ehea_add_adapter_mr(struct ehea_adapter *adapter) +{ + if (adapter->active_ports) + return 0; + + return ehea_reg_kernel_mr(adapter, &adapter->mr); +} + +static int ehea_up(struct net_device *dev) +{ + int ret, i; + struct ehea_port *port = netdev_priv(dev); + + if (port->state == EHEA_PORT_UP) + return 0; + + ret = ehea_port_res_setup(port, port->num_def_qps); + if (ret) { + netdev_err(dev, "port_res_failed\n"); + goto out; + } + + /* Set default QP for this port */ + ret = ehea_configure_port(port); + if (ret) { + netdev_err(dev, "ehea_configure_port failed. ret:%d\n", ret); + goto out_clean_pr; + } + + ret = ehea_reg_interrupts(dev); + if (ret) { + netdev_err(dev, "reg_interrupts failed. ret:%d\n", ret); + goto out_clean_pr; + } + + for (i = 0; i < port->num_def_qps; i++) { + ret = ehea_activate_qp(port->adapter, port->port_res[i].qp); + if (ret) { + netdev_err(dev, "activate_qp failed\n"); + goto out_free_irqs; + } + } + + for (i = 0; i < port->num_def_qps; i++) { + ret = ehea_fill_port_res(&port->port_res[i]); + if (ret) { + netdev_err(dev, "out_free_irqs\n"); + goto out_free_irqs; + } + } + + ret = ehea_broadcast_reg_helper(port, H_REG_BCMC); + if (ret) { + ret = -EIO; + goto out_free_irqs; + } + + port->state = EHEA_PORT_UP; + + ret = 0; + goto out; + +out_free_irqs: + ehea_free_interrupts(dev); + +out_clean_pr: + ehea_clean_all_portres(port); +out: + if (ret) + netdev_info(dev, "Failed starting. ret=%i\n", ret); + + ehea_update_bcmc_registrations(); + ehea_update_firmware_handles(); + + return ret; +} + +static void port_napi_disable(struct ehea_port *port) +{ + int i; + + for (i = 0; i < port->num_def_qps; i++) + napi_disable(&port->port_res[i].napi); +} + +static void port_napi_enable(struct ehea_port *port) +{ + int i; + + for (i = 0; i < port->num_def_qps; i++) + napi_enable(&port->port_res[i].napi); +} + +static int ehea_open(struct net_device *dev) +{ + int ret; + struct ehea_port *port = netdev_priv(dev); + + mutex_lock(&port->port_lock); + + netif_info(port, ifup, dev, "enabling port\n"); + + netif_carrier_off(dev); + + ret = ehea_up(dev); + if (!ret) { + port_napi_enable(port); + netif_tx_start_all_queues(dev); + } + + mutex_unlock(&port->port_lock); + schedule_delayed_work(&port->stats_work, + round_jiffies_relative(msecs_to_jiffies(1000))); + + return ret; +} + +static int ehea_down(struct net_device *dev) +{ + int ret; + struct ehea_port *port = netdev_priv(dev); + + if (port->state == EHEA_PORT_DOWN) + return 0; + + ehea_drop_multicast_list(dev); + ehea_allmulti(dev, 0); + ehea_broadcast_reg_helper(port, H_DEREG_BCMC); + + ehea_free_interrupts(dev); + + port->state = EHEA_PORT_DOWN; + + ehea_update_bcmc_registrations(); + + ret = ehea_clean_all_portres(port); + if (ret) + netdev_info(dev, "Failed freeing resources. ret=%i\n", ret); + + ehea_update_firmware_handles(); + + return ret; +} + +static int ehea_stop(struct net_device *dev) +{ + int ret; + struct ehea_port *port = netdev_priv(dev); + + netif_info(port, ifdown, dev, "disabling port\n"); + + set_bit(__EHEA_DISABLE_PORT_RESET, &port->flags); + cancel_work_sync(&port->reset_task); + cancel_delayed_work_sync(&port->stats_work); + mutex_lock(&port->port_lock); + netif_tx_stop_all_queues(dev); + port_napi_disable(port); + ret = ehea_down(dev); + mutex_unlock(&port->port_lock); + clear_bit(__EHEA_DISABLE_PORT_RESET, &port->flags); + return ret; +} + +static void ehea_purge_sq(struct ehea_qp *orig_qp) +{ + struct ehea_qp qp = *orig_qp; + struct ehea_qp_init_attr *init_attr = &qp.init_attr; + struct ehea_swqe *swqe; + int wqe_index; + int i; + + for (i = 0; i < init_attr->act_nr_send_wqes; i++) { + swqe = ehea_get_swqe(&qp, &wqe_index); + swqe->tx_control |= EHEA_SWQE_PURGE; + } +} + +static void ehea_flush_sq(struct ehea_port *port) +{ + int i; + + for (i = 0; i < port->num_def_qps; i++) { + struct ehea_port_res *pr = &port->port_res[i]; + int swqe_max = pr->sq_skba_size - 2 - pr->swqe_ll_count; + int ret; + + ret = wait_event_timeout(port->swqe_avail_wq, + atomic_read(&pr->swqe_avail) >= swqe_max, + msecs_to_jiffies(100)); + + if (!ret) { + pr_err("WARNING: sq not flushed completely\n"); + break; + } + } +} + +static int ehea_stop_qps(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_adapter *adapter = port->adapter; + struct hcp_modify_qp_cb0 *cb0; + int ret = -EIO; + int dret; + int i; + u64 hret; + u64 dummy64 = 0; + u16 dummy16 = 0; + + cb0 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb0) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < (port->num_def_qps); i++) { + struct ehea_port_res *pr = &port->port_res[i]; + struct ehea_qp *qp = pr->qp; + + /* Purge send queue */ + ehea_purge_sq(qp); + + /* Disable queue pair */ + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), + cb0); + if (hret != H_SUCCESS) { + pr_err("query_ehea_qp failed (1)\n"); + goto out; + } + + cb0->qp_ctl_reg = (cb0->qp_ctl_reg & H_QP_CR_RES_STATE) << 8; + cb0->qp_ctl_reg &= ~H_QP_CR_ENABLED; + + hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, + 1), cb0, &dummy64, + &dummy64, &dummy16, &dummy16); + if (hret != H_SUCCESS) { + pr_err("modify_ehea_qp failed (1)\n"); + goto out; + } + + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), + cb0); + if (hret != H_SUCCESS) { + pr_err("query_ehea_qp failed (2)\n"); + goto out; + } + + /* deregister shared memory regions */ + dret = ehea_rem_smrs(pr); + if (dret) { + pr_err("unreg shared memory region failed\n"); + goto out; + } + } + + ret = 0; +out: + free_page((unsigned long)cb0); + + return ret; +} + +static void ehea_update_rqs(struct ehea_qp *orig_qp, struct ehea_port_res *pr) +{ + struct ehea_qp qp = *orig_qp; + struct ehea_qp_init_attr *init_attr = &qp.init_attr; + struct ehea_rwqe *rwqe; + struct sk_buff **skba_rq2 = pr->rq2_skba.arr; + struct sk_buff **skba_rq3 = pr->rq3_skba.arr; + struct sk_buff *skb; + u32 lkey = pr->recv_mr.lkey; + + + int i; + int index; + + for (i = 0; i < init_attr->act_nr_rwqes_rq2 + 1; i++) { + rwqe = ehea_get_next_rwqe(&qp, 2); + rwqe->sg_list[0].l_key = lkey; + index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, rwqe->wr_id); + skb = skba_rq2[index]; + if (skb) + rwqe->sg_list[0].vaddr = ehea_map_vaddr(skb->data); + } + + for (i = 0; i < init_attr->act_nr_rwqes_rq3 + 1; i++) { + rwqe = ehea_get_next_rwqe(&qp, 3); + rwqe->sg_list[0].l_key = lkey; + index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, rwqe->wr_id); + skb = skba_rq3[index]; + if (skb) + rwqe->sg_list[0].vaddr = ehea_map_vaddr(skb->data); + } +} + +static int ehea_restart_qps(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_adapter *adapter = port->adapter; + int ret = 0; + int i; + + struct hcp_modify_qp_cb0 *cb0; + u64 hret; + u64 dummy64 = 0; + u16 dummy16 = 0; + + cb0 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb0) + return -ENOMEM; + + for (i = 0; i < (port->num_def_qps); i++) { + struct ehea_port_res *pr = &port->port_res[i]; + struct ehea_qp *qp = pr->qp; + + ret = ehea_gen_smrs(pr); + if (ret) { + netdev_err(dev, "creation of shared memory regions failed\n"); + goto out; + } + + ehea_update_rqs(qp, pr); + + /* Enable queue pair */ + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), + cb0); + if (hret != H_SUCCESS) { + netdev_err(dev, "query_ehea_qp failed (1)\n"); + ret = -EFAULT; + goto out; + } + + cb0->qp_ctl_reg = (cb0->qp_ctl_reg & H_QP_CR_RES_STATE) << 8; + cb0->qp_ctl_reg |= H_QP_CR_ENABLED; + + hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, + 1), cb0, &dummy64, + &dummy64, &dummy16, &dummy16); + if (hret != H_SUCCESS) { + netdev_err(dev, "modify_ehea_qp failed (1)\n"); + ret = -EFAULT; + goto out; + } + + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), + cb0); + if (hret != H_SUCCESS) { + netdev_err(dev, "query_ehea_qp failed (2)\n"); + ret = -EFAULT; + goto out; + } + + /* refill entire queue */ + ehea_refill_rq1(pr, pr->rq1_skba.index, 0); + ehea_refill_rq2(pr, 0); + ehea_refill_rq3(pr, 0); + } +out: + free_page((unsigned long)cb0); + + return ret; +} + +static void ehea_reset_port(struct work_struct *work) +{ + int ret; + struct ehea_port *port = + container_of(work, struct ehea_port, reset_task); + struct net_device *dev = port->netdev; + + mutex_lock(&dlpar_mem_lock); + port->resets++; + mutex_lock(&port->port_lock); + netif_tx_disable(dev); + + port_napi_disable(port); + + ehea_down(dev); + + ret = ehea_up(dev); + if (ret) + goto out; + + ehea_set_multicast_list(dev); + + netif_info(port, timer, dev, "reset successful\n"); + + port_napi_enable(port); + + netif_tx_wake_all_queues(dev); +out: + mutex_unlock(&port->port_lock); + mutex_unlock(&dlpar_mem_lock); +} + +static void ehea_rereg_mrs(void) +{ + int ret, i; + struct ehea_adapter *adapter; + + pr_info("LPAR memory changed - re-initializing driver\n"); + + list_for_each_entry(adapter, &adapter_list, list) + if (adapter->active_ports) { + /* Shutdown all ports */ + for (i = 0; i < EHEA_MAX_PORTS; i++) { + struct ehea_port *port = adapter->port[i]; + struct net_device *dev; + + if (!port) + continue; + + dev = port->netdev; + + if (dev->flags & IFF_UP) { + mutex_lock(&port->port_lock); + netif_tx_disable(dev); + ehea_flush_sq(port); + ret = ehea_stop_qps(dev); + if (ret) { + mutex_unlock(&port->port_lock); + goto out; + } + port_napi_disable(port); + mutex_unlock(&port->port_lock); + } + reset_sq_restart_flag(port); + } + + /* Unregister old memory region */ + ret = ehea_rem_mr(&adapter->mr); + if (ret) { + pr_err("unregister MR failed - driver inoperable!\n"); + goto out; + } + } + + clear_bit(__EHEA_STOP_XFER, &ehea_driver_flags); + + list_for_each_entry(adapter, &adapter_list, list) + if (adapter->active_ports) { + /* Register new memory region */ + ret = ehea_reg_kernel_mr(adapter, &adapter->mr); + if (ret) { + pr_err("register MR failed - driver inoperable!\n"); + goto out; + } + + /* Restart all ports */ + for (i = 0; i < EHEA_MAX_PORTS; i++) { + struct ehea_port *port = adapter->port[i]; + + if (port) { + struct net_device *dev = port->netdev; + + if (dev->flags & IFF_UP) { + mutex_lock(&port->port_lock); + ret = ehea_restart_qps(dev); + if (!ret) { + check_sqs(port); + port_napi_enable(port); + netif_tx_wake_all_queues(dev); + } else { + netdev_err(dev, "Unable to restart QPS\n"); + } + mutex_unlock(&port->port_lock); + } + } + } + } + pr_info("re-initializing driver complete\n"); +out: + return; +} + +static void ehea_tx_watchdog(struct net_device *dev, unsigned int txqueue) +{ + struct ehea_port *port = netdev_priv(dev); + + if (netif_carrier_ok(dev) && + !test_bit(__EHEA_STOP_XFER, &ehea_driver_flags)) + ehea_schedule_port_reset(port); +} + +static int ehea_sense_adapter_attr(struct ehea_adapter *adapter) +{ + struct hcp_query_ehea *cb; + u64 hret; + int ret; + + cb = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb) { + ret = -ENOMEM; + goto out; + } + + hret = ehea_h_query_ehea(adapter->handle, cb); + + if (hret != H_SUCCESS) { + ret = -EIO; + goto out_herr; + } + + adapter->max_mc_mac = cb->max_mc_mac - 1; + ret = 0; + +out_herr: + free_page((unsigned long)cb); +out: + return ret; +} + +static int ehea_get_jumboframe_status(struct ehea_port *port, int *jumbo) +{ + struct hcp_ehea_port_cb4 *cb4; + u64 hret; + int ret = 0; + + *jumbo = 0; + + /* (Try to) enable *jumbo frames */ + cb4 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb4) { + pr_err("no mem for cb4\n"); + ret = -ENOMEM; + goto out; + } else { + hret = ehea_h_query_ehea_port(port->adapter->handle, + port->logical_port_id, + H_PORT_CB4, + H_PORT_CB4_JUMBO, cb4); + if (hret == H_SUCCESS) { + if (cb4->jumbo_frame) + *jumbo = 1; + else { + cb4->jumbo_frame = 1; + hret = ehea_h_modify_ehea_port(port->adapter-> + handle, + port-> + logical_port_id, + H_PORT_CB4, + H_PORT_CB4_JUMBO, + cb4); + if (hret == H_SUCCESS) + *jumbo = 1; + } + } else + ret = -EINVAL; + + free_page((unsigned long)cb4); + } +out: + return ret; +} + +static ssize_t log_port_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ehea_port *port = container_of(dev, struct ehea_port, ofdev.dev); + return sprintf(buf, "%d", port->logical_port_id); +} + +static DEVICE_ATTR_RO(log_port_id); + +static void logical_port_release(struct device *dev) +{ + struct ehea_port *port = container_of(dev, struct ehea_port, ofdev.dev); + of_node_put(port->ofdev.dev.of_node); +} + +static struct device *ehea_register_port(struct ehea_port *port, + struct device_node *dn) +{ + int ret; + + port->ofdev.dev.of_node = of_node_get(dn); + port->ofdev.dev.parent = &port->adapter->ofdev->dev; + port->ofdev.dev.bus = &ibmebus_bus_type; + + dev_set_name(&port->ofdev.dev, "port%d", port_name_cnt++); + port->ofdev.dev.release = logical_port_release; + + ret = of_device_register(&port->ofdev); + if (ret) { + pr_err("failed to register device. ret=%d\n", ret); + put_device(&port->ofdev.dev); + goto out; + } + + ret = device_create_file(&port->ofdev.dev, &dev_attr_log_port_id); + if (ret) { + pr_err("failed to register attributes, ret=%d\n", ret); + goto out_unreg_of_dev; + } + + return &port->ofdev.dev; + +out_unreg_of_dev: + of_device_unregister(&port->ofdev); +out: + return NULL; +} + +static void ehea_unregister_port(struct ehea_port *port) +{ + device_remove_file(&port->ofdev.dev, &dev_attr_log_port_id); + of_device_unregister(&port->ofdev); +} + +static const struct net_device_ops ehea_netdev_ops = { + .ndo_open = ehea_open, + .ndo_stop = ehea_stop, + .ndo_start_xmit = ehea_start_xmit, + .ndo_get_stats64 = ehea_get_stats64, + .ndo_set_mac_address = ehea_set_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_rx_mode = ehea_set_multicast_list, + .ndo_vlan_rx_add_vid = ehea_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = ehea_vlan_rx_kill_vid, + .ndo_tx_timeout = ehea_tx_watchdog, +}; + +static struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, + u32 logical_port_id, + struct device_node *dn) +{ + int ret; + struct net_device *dev; + struct ehea_port *port; + struct device *port_dev; + int jumbo; + + /* allocate memory for the port structures */ + dev = alloc_etherdev_mq(sizeof(struct ehea_port), EHEA_MAX_PORT_RES); + + if (!dev) { + ret = -ENOMEM; + goto out_err; + } + + port = netdev_priv(dev); + + mutex_init(&port->port_lock); + port->state = EHEA_PORT_DOWN; + port->sig_comp_iv = sq_entries / 10; + + port->adapter = adapter; + port->netdev = dev; + port->logical_port_id = logical_port_id; + + port->msg_enable = netif_msg_init(msg_level, EHEA_MSG_DEFAULT); + + port->mc_list = kzalloc(sizeof(struct ehea_mc_list), GFP_KERNEL); + if (!port->mc_list) { + ret = -ENOMEM; + goto out_free_ethdev; + } + + INIT_LIST_HEAD(&port->mc_list->list); + + ret = ehea_sense_port_attr(port); + if (ret) + goto out_free_mc_list; + + netif_set_real_num_rx_queues(dev, port->num_def_qps); + netif_set_real_num_tx_queues(dev, port->num_def_qps); + + port_dev = ehea_register_port(port, dn); + if (!port_dev) + goto out_free_mc_list; + + SET_NETDEV_DEV(dev, port_dev); + + /* initialize net_device structure */ + eth_hw_addr_set(dev, (u8 *)&port->mac_addr); + + dev->netdev_ops = &ehea_netdev_ops; + ehea_set_ethtool_ops(dev); + + dev->hw_features = NETIF_F_SG | NETIF_F_TSO | + NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_CTAG_TX; + dev->features = NETIF_F_SG | NETIF_F_TSO | + NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXCSUM; + dev->vlan_features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_HIGHDMA | + NETIF_F_IP_CSUM; + dev->watchdog_timeo = EHEA_WATCH_DOG_TIMEOUT; + + /* MTU range: 68 - 9022 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = EHEA_MAX_PACKET_SIZE; + + INIT_WORK(&port->reset_task, ehea_reset_port); + INIT_DELAYED_WORK(&port->stats_work, ehea_update_stats); + + init_waitqueue_head(&port->swqe_avail_wq); + init_waitqueue_head(&port->restart_wq); + + ret = register_netdev(dev); + if (ret) { + pr_err("register_netdev failed. ret=%d\n", ret); + goto out_unreg_port; + } + + ret = ehea_get_jumboframe_status(port, &jumbo); + if (ret) + netdev_err(dev, "failed determining jumbo frame status\n"); + + netdev_info(dev, "Jumbo frames are %sabled\n", + jumbo == 1 ? "en" : "dis"); + + adapter->active_ports++; + + return port; + +out_unreg_port: + ehea_unregister_port(port); + +out_free_mc_list: + kfree(port->mc_list); + +out_free_ethdev: + free_netdev(dev); + +out_err: + pr_err("setting up logical port with id=%d failed, ret=%d\n", + logical_port_id, ret); + return NULL; +} + +static void ehea_shutdown_single_port(struct ehea_port *port) +{ + struct ehea_adapter *adapter = port->adapter; + + cancel_work_sync(&port->reset_task); + cancel_delayed_work_sync(&port->stats_work); + unregister_netdev(port->netdev); + ehea_unregister_port(port); + kfree(port->mc_list); + free_netdev(port->netdev); + adapter->active_ports--; +} + +static int ehea_setup_ports(struct ehea_adapter *adapter) +{ + struct device_node *lhea_dn; + struct device_node *eth_dn = NULL; + + const u32 *dn_log_port_id; + int i = 0; + + lhea_dn = adapter->ofdev->dev.of_node; + while ((eth_dn = of_get_next_child(lhea_dn, eth_dn))) { + + dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no", + NULL); + if (!dn_log_port_id) { + pr_err("bad device node: eth_dn name=%pOF\n", eth_dn); + continue; + } + + if (ehea_add_adapter_mr(adapter)) { + pr_err("creating MR failed\n"); + of_node_put(eth_dn); + return -EIO; + } + + adapter->port[i] = ehea_setup_single_port(adapter, + *dn_log_port_id, + eth_dn); + if (adapter->port[i]) + netdev_info(adapter->port[i]->netdev, + "logical port id #%d\n", *dn_log_port_id); + else + ehea_remove_adapter_mr(adapter); + + i++; + } + return 0; +} + +static struct device_node *ehea_get_eth_dn(struct ehea_adapter *adapter, + u32 logical_port_id) +{ + struct device_node *lhea_dn; + struct device_node *eth_dn = NULL; + const u32 *dn_log_port_id; + + lhea_dn = adapter->ofdev->dev.of_node; + while ((eth_dn = of_get_next_child(lhea_dn, eth_dn))) { + + dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no", + NULL); + if (dn_log_port_id) + if (*dn_log_port_id == logical_port_id) + return eth_dn; + } + + return NULL; +} + +static ssize_t probe_port_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ehea_adapter *adapter = dev_get_drvdata(dev); + struct ehea_port *port; + struct device_node *eth_dn = NULL; + int i; + + u32 logical_port_id; + + sscanf(buf, "%d", &logical_port_id); + + port = ehea_get_port(adapter, logical_port_id); + + if (port) { + netdev_info(port->netdev, "adding port with logical port id=%d failed: port already configured\n", + logical_port_id); + return -EINVAL; + } + + eth_dn = ehea_get_eth_dn(adapter, logical_port_id); + + if (!eth_dn) { + pr_info("no logical port with id %d found\n", logical_port_id); + return -EINVAL; + } + + if (ehea_add_adapter_mr(adapter)) { + pr_err("creating MR failed\n"); + of_node_put(eth_dn); + return -EIO; + } + + port = ehea_setup_single_port(adapter, logical_port_id, eth_dn); + + of_node_put(eth_dn); + + if (port) { + for (i = 0; i < EHEA_MAX_PORTS; i++) + if (!adapter->port[i]) { + adapter->port[i] = port; + break; + } + + netdev_info(port->netdev, "added: (logical port id=%d)\n", + logical_port_id); + } else { + ehea_remove_adapter_mr(adapter); + return -EIO; + } + + return (ssize_t) count; +} + +static ssize_t remove_port_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ehea_adapter *adapter = dev_get_drvdata(dev); + struct ehea_port *port; + int i; + u32 logical_port_id; + + sscanf(buf, "%d", &logical_port_id); + + port = ehea_get_port(adapter, logical_port_id); + + if (port) { + netdev_info(port->netdev, "removed: (logical port id=%d)\n", + logical_port_id); + + ehea_shutdown_single_port(port); + + for (i = 0; i < EHEA_MAX_PORTS; i++) + if (adapter->port[i] == port) { + adapter->port[i] = NULL; + break; + } + } else { + pr_err("removing port with logical port id=%d failed. port not configured.\n", + logical_port_id); + return -EINVAL; + } + + ehea_remove_adapter_mr(adapter); + + return (ssize_t) count; +} + +static DEVICE_ATTR_WO(probe_port); +static DEVICE_ATTR_WO(remove_port); + +static int ehea_create_device_sysfs(struct platform_device *dev) +{ + int ret = device_create_file(&dev->dev, &dev_attr_probe_port); + if (ret) + goto out; + + ret = device_create_file(&dev->dev, &dev_attr_remove_port); +out: + return ret; +} + +static void ehea_remove_device_sysfs(struct platform_device *dev) +{ + device_remove_file(&dev->dev, &dev_attr_probe_port); + device_remove_file(&dev->dev, &dev_attr_remove_port); +} + +static int ehea_reboot_notifier(struct notifier_block *nb, + unsigned long action, void *unused) +{ + if (action == SYS_RESTART) { + pr_info("Reboot: freeing all eHEA resources\n"); + ibmebus_unregister_driver(&ehea_driver); + } + return NOTIFY_DONE; +} + +static struct notifier_block ehea_reboot_nb = { + .notifier_call = ehea_reboot_notifier, +}; + +static int ehea_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + int ret = NOTIFY_BAD; + struct memory_notify *arg = data; + + mutex_lock(&dlpar_mem_lock); + + switch (action) { + case MEM_CANCEL_OFFLINE: + pr_info("memory offlining canceled"); + fallthrough; /* re-add canceled memory block */ + + case MEM_ONLINE: + pr_info("memory is going online"); + set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); + if (ehea_add_sect_bmap(arg->start_pfn, arg->nr_pages)) + goto out_unlock; + ehea_rereg_mrs(); + break; + + case MEM_GOING_OFFLINE: + pr_info("memory is going offline"); + set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); + if (ehea_rem_sect_bmap(arg->start_pfn, arg->nr_pages)) + goto out_unlock; + ehea_rereg_mrs(); + break; + + default: + break; + } + + ehea_update_firmware_handles(); + ret = NOTIFY_OK; + +out_unlock: + mutex_unlock(&dlpar_mem_lock); + return ret; +} + +static struct notifier_block ehea_mem_nb = { + .notifier_call = ehea_mem_notifier, +}; + +static void ehea_crash_handler(void) +{ + int i; + + if (ehea_fw_handles.arr) + for (i = 0; i < ehea_fw_handles.num_entries; i++) + ehea_h_free_resource(ehea_fw_handles.arr[i].adh, + ehea_fw_handles.arr[i].fwh, + FORCE_FREE); + + if (ehea_bcmc_regs.arr) + for (i = 0; i < ehea_bcmc_regs.num_entries; i++) + ehea_h_reg_dereg_bcmc(ehea_bcmc_regs.arr[i].adh, + ehea_bcmc_regs.arr[i].port_id, + ehea_bcmc_regs.arr[i].reg_type, + ehea_bcmc_regs.arr[i].macaddr, + 0, H_DEREG_BCMC); +} + +static atomic_t ehea_memory_hooks_registered; + +/* Register memory hooks on probe of first adapter */ +static int ehea_register_memory_hooks(void) +{ + int ret = 0; + + if (atomic_inc_return(&ehea_memory_hooks_registered) > 1) + return 0; + + ret = ehea_create_busmap(); + if (ret) { + pr_info("ehea_create_busmap failed\n"); + goto out; + } + + ret = register_reboot_notifier(&ehea_reboot_nb); + if (ret) { + pr_info("register_reboot_notifier failed\n"); + goto out; + } + + ret = register_memory_notifier(&ehea_mem_nb); + if (ret) { + pr_info("register_memory_notifier failed\n"); + goto out2; + } + + ret = crash_shutdown_register(ehea_crash_handler); + if (ret) { + pr_info("crash_shutdown_register failed\n"); + goto out3; + } + + return 0; + +out3: + unregister_memory_notifier(&ehea_mem_nb); +out2: + unregister_reboot_notifier(&ehea_reboot_nb); +out: + atomic_dec(&ehea_memory_hooks_registered); + return ret; +} + +static void ehea_unregister_memory_hooks(void) +{ + /* Only remove the hooks if we've registered them */ + if (atomic_read(&ehea_memory_hooks_registered) == 0) + return; + + unregister_reboot_notifier(&ehea_reboot_nb); + if (crash_shutdown_unregister(ehea_crash_handler)) + pr_info("failed unregistering crash handler\n"); + unregister_memory_notifier(&ehea_mem_nb); +} + +static int ehea_probe_adapter(struct platform_device *dev) +{ + struct ehea_adapter *adapter; + const u64 *adapter_handle; + int ret; + int i; + + ret = ehea_register_memory_hooks(); + if (ret) + return ret; + + if (!dev || !dev->dev.of_node) { + pr_err("Invalid ibmebus device probed\n"); + return -EINVAL; + } + + adapter = devm_kzalloc(&dev->dev, sizeof(*adapter), GFP_KERNEL); + if (!adapter) { + ret = -ENOMEM; + dev_err(&dev->dev, "no mem for ehea_adapter\n"); + goto out; + } + + list_add(&adapter->list, &adapter_list); + + adapter->ofdev = dev; + + adapter_handle = of_get_property(dev->dev.of_node, "ibm,hea-handle", + NULL); + if (adapter_handle) + adapter->handle = *adapter_handle; + + if (!adapter->handle) { + dev_err(&dev->dev, "failed getting handle for adapter" + " '%pOF'\n", dev->dev.of_node); + ret = -ENODEV; + goto out_free_ad; + } + + adapter->pd = EHEA_PD_ID; + + platform_set_drvdata(dev, adapter); + + + /* initialize adapter and ports */ + /* get adapter properties */ + ret = ehea_sense_adapter_attr(adapter); + if (ret) { + dev_err(&dev->dev, "sense_adapter_attr failed: %d\n", ret); + goto out_free_ad; + } + + adapter->neq = ehea_create_eq(adapter, + EHEA_NEQ, EHEA_MAX_ENTRIES_EQ, 1); + if (!adapter->neq) { + ret = -EIO; + dev_err(&dev->dev, "NEQ creation failed\n"); + goto out_free_ad; + } + + tasklet_setup(&adapter->neq_tasklet, ehea_neq_tasklet); + + ret = ehea_create_device_sysfs(dev); + if (ret) + goto out_kill_eq; + + ret = ehea_setup_ports(adapter); + if (ret) { + dev_err(&dev->dev, "setup_ports failed\n"); + goto out_rem_dev_sysfs; + } + + ret = ibmebus_request_irq(adapter->neq->attr.ist1, + ehea_interrupt_neq, 0, + "ehea_neq", adapter); + if (ret) { + dev_err(&dev->dev, "requesting NEQ IRQ failed\n"); + goto out_shutdown_ports; + } + + /* Handle any events that might be pending. */ + tasklet_hi_schedule(&adapter->neq_tasklet); + + ret = 0; + goto out; + +out_shutdown_ports: + for (i = 0; i < EHEA_MAX_PORTS; i++) + if (adapter->port[i]) { + ehea_shutdown_single_port(adapter->port[i]); + adapter->port[i] = NULL; + } + +out_rem_dev_sysfs: + ehea_remove_device_sysfs(dev); + +out_kill_eq: + ehea_destroy_eq(adapter->neq); + +out_free_ad: + list_del(&adapter->list); + +out: + ehea_update_firmware_handles(); + + return ret; +} + +static int ehea_remove(struct platform_device *dev) +{ + struct ehea_adapter *adapter = platform_get_drvdata(dev); + int i; + + for (i = 0; i < EHEA_MAX_PORTS; i++) + if (adapter->port[i]) { + ehea_shutdown_single_port(adapter->port[i]); + adapter->port[i] = NULL; + } + + ehea_remove_device_sysfs(dev); + + ibmebus_free_irq(adapter->neq->attr.ist1, adapter); + tasklet_kill(&adapter->neq_tasklet); + + ehea_destroy_eq(adapter->neq); + ehea_remove_adapter_mr(adapter); + list_del(&adapter->list); + + ehea_update_firmware_handles(); + + return 0; +} + +static int check_module_parm(void) +{ + int ret = 0; + + if ((rq1_entries < EHEA_MIN_ENTRIES_QP) || + (rq1_entries > EHEA_MAX_ENTRIES_RQ1)) { + pr_info("Bad parameter: rq1_entries\n"); + ret = -EINVAL; + } + if ((rq2_entries < EHEA_MIN_ENTRIES_QP) || + (rq2_entries > EHEA_MAX_ENTRIES_RQ2)) { + pr_info("Bad parameter: rq2_entries\n"); + ret = -EINVAL; + } + if ((rq3_entries < EHEA_MIN_ENTRIES_QP) || + (rq3_entries > EHEA_MAX_ENTRIES_RQ3)) { + pr_info("Bad parameter: rq3_entries\n"); + ret = -EINVAL; + } + if ((sq_entries < EHEA_MIN_ENTRIES_QP) || + (sq_entries > EHEA_MAX_ENTRIES_SQ)) { + pr_info("Bad parameter: sq_entries\n"); + ret = -EINVAL; + } + + return ret; +} + +static ssize_t capabilities_show(struct device_driver *drv, char *buf) +{ + return sprintf(buf, "%d", EHEA_CAPABILITIES); +} + +static DRIVER_ATTR_RO(capabilities); + +static int __init ehea_module_init(void) +{ + int ret; + + pr_info("IBM eHEA ethernet device driver (Release %s)\n", DRV_VERSION); + + memset(&ehea_fw_handles, 0, sizeof(ehea_fw_handles)); + memset(&ehea_bcmc_regs, 0, sizeof(ehea_bcmc_regs)); + + mutex_init(&ehea_fw_handles.lock); + spin_lock_init(&ehea_bcmc_regs.lock); + + ret = check_module_parm(); + if (ret) + goto out; + + ret = ibmebus_register_driver(&ehea_driver); + if (ret) { + pr_err("failed registering eHEA device driver on ebus\n"); + goto out; + } + + ret = driver_create_file(&ehea_driver.driver, + &driver_attr_capabilities); + if (ret) { + pr_err("failed to register capabilities attribute, ret=%d\n", + ret); + goto out2; + } + + return ret; + +out2: + ibmebus_unregister_driver(&ehea_driver); +out: + return ret; +} + +static void __exit ehea_module_exit(void) +{ + driver_remove_file(&ehea_driver.driver, &driver_attr_capabilities); + ibmebus_unregister_driver(&ehea_driver); + ehea_unregister_memory_hooks(); + kfree(ehea_fw_handles.arr); + kfree(ehea_bcmc_regs.arr); + ehea_destroy_busmap(); +} + +module_init(ehea_module_init); +module_exit(ehea_module_exit); diff --git a/drivers/net/ethernet/ibm/ehea/ehea_phyp.c b/drivers/net/ethernet/ibm/ehea/ehea_phyp.c new file mode 100644 index 000000000..e63716e13 --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_phyp.c @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * linux/drivers/net/ethernet/ibm/ehea/ehea_phyp.c + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "ehea_phyp.h" + + +static inline u16 get_order_of_qentries(u16 queue_entries) +{ + u8 ld = 1; /* logarithmus dualis */ + while (((1U << ld) - 1) < queue_entries) + ld++; + return ld - 1; +} + +/* Defines for H_CALL H_ALLOC_RESOURCE */ +#define H_ALL_RES_TYPE_QP 1 +#define H_ALL_RES_TYPE_CQ 2 +#define H_ALL_RES_TYPE_EQ 3 +#define H_ALL_RES_TYPE_MR 5 +#define H_ALL_RES_TYPE_MW 6 + +static long ehea_plpar_hcall_norets(unsigned long opcode, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7) +{ + long ret; + int i, sleep_msecs; + + for (i = 0; i < 5; i++) { + ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, + arg5, arg6, arg7); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + pr_err("opcode=%lx ret=%lx" + " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" + " arg5=%lx arg6=%lx arg7=%lx\n", + opcode, ret, + arg1, arg2, arg3, arg4, arg5, arg6, arg7); + + return ret; + } + + return H_BUSY; +} + +static long ehea_plpar_hcall9(unsigned long opcode, + unsigned long *outs, /* array of 9 outputs */ + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7, + unsigned long arg8, + unsigned long arg9) +{ + long ret; + int i, sleep_msecs; + u8 cb_cat; + + for (i = 0; i < 5; i++) { + ret = plpar_hcall9(opcode, outs, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + cb_cat = EHEA_BMASK_GET(H_MEHEAPORT_CAT, arg2); + + if ((ret < H_SUCCESS) && !(((ret == H_AUTHORITY) + && (opcode == H_MODIFY_HEA_PORT)) + && (((cb_cat == H_PORT_CB4) && ((arg3 == H_PORT_CB4_JUMBO) + || (arg3 == H_PORT_CB4_SPEED))) || ((cb_cat == H_PORT_CB7) + && (arg3 == H_PORT_CB7_DUCQPN))))) + pr_err("opcode=%lx ret=%lx" + " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" + " arg5=%lx arg6=%lx arg7=%lx arg8=%lx" + " arg9=%lx" + " out1=%lx out2=%lx out3=%lx out4=%lx" + " out5=%lx out6=%lx out7=%lx out8=%lx" + " out9=%lx\n", + opcode, ret, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9, + outs[0], outs[1], outs[2], outs[3], outs[4], + outs[5], outs[6], outs[7], outs[8]); + return ret; + } + + return H_BUSY; +} + +u64 ehea_h_query_ehea_qp(const u64 adapter_handle, const u8 qp_category, + const u64 qp_handle, const u64 sel_mask, void *cb_addr) +{ + return ehea_plpar_hcall_norets(H_QUERY_HEA_QP, + adapter_handle, /* R4 */ + qp_category, /* R5 */ + qp_handle, /* R6 */ + sel_mask, /* R7 */ + __pa(cb_addr), /* R8 */ + 0, 0); +} + +/* input param R5 */ +#define H_ALL_RES_QP_EQPO EHEA_BMASK_IBM(9, 11) +#define H_ALL_RES_QP_QPP EHEA_BMASK_IBM(12, 12) +#define H_ALL_RES_QP_RQR EHEA_BMASK_IBM(13, 15) +#define H_ALL_RES_QP_EQEG EHEA_BMASK_IBM(16, 16) +#define H_ALL_RES_QP_LL_QP EHEA_BMASK_IBM(17, 17) +#define H_ALL_RES_QP_DMA128 EHEA_BMASK_IBM(19, 19) +#define H_ALL_RES_QP_HSM EHEA_BMASK_IBM(20, 21) +#define H_ALL_RES_QP_SIGT EHEA_BMASK_IBM(22, 23) +#define H_ALL_RES_QP_TENURE EHEA_BMASK_IBM(48, 55) +#define H_ALL_RES_QP_RES_TYP EHEA_BMASK_IBM(56, 63) + +/* input param R9 */ +#define H_ALL_RES_QP_TOKEN EHEA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_PD EHEA_BMASK_IBM(32, 63) + +/* input param R10 */ +#define H_ALL_RES_QP_MAX_SWQE EHEA_BMASK_IBM(4, 7) +#define H_ALL_RES_QP_MAX_R1WQE EHEA_BMASK_IBM(12, 15) +#define H_ALL_RES_QP_MAX_R2WQE EHEA_BMASK_IBM(20, 23) +#define H_ALL_RES_QP_MAX_R3WQE EHEA_BMASK_IBM(28, 31) +/* Max Send Scatter Gather Elements */ +#define H_ALL_RES_QP_MAX_SSGE EHEA_BMASK_IBM(37, 39) +#define H_ALL_RES_QP_MAX_R1SGE EHEA_BMASK_IBM(45, 47) +/* Max Receive SG Elements RQ1 */ +#define H_ALL_RES_QP_MAX_R2SGE EHEA_BMASK_IBM(53, 55) +#define H_ALL_RES_QP_MAX_R3SGE EHEA_BMASK_IBM(61, 63) + +/* input param R11 */ +#define H_ALL_RES_QP_SWQE_IDL EHEA_BMASK_IBM(0, 7) +/* max swqe immediate data length */ +#define H_ALL_RES_QP_PORT_NUM EHEA_BMASK_IBM(48, 63) + +/* input param R12 */ +#define H_ALL_RES_QP_TH_RQ2 EHEA_BMASK_IBM(0, 15) +/* Threshold RQ2 */ +#define H_ALL_RES_QP_TH_RQ3 EHEA_BMASK_IBM(16, 31) +/* Threshold RQ3 */ + +/* output param R6 */ +#define H_ALL_RES_QP_ACT_SWQE EHEA_BMASK_IBM(0, 15) +#define H_ALL_RES_QP_ACT_R1WQE EHEA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_ACT_R2WQE EHEA_BMASK_IBM(32, 47) +#define H_ALL_RES_QP_ACT_R3WQE EHEA_BMASK_IBM(48, 63) + +/* output param, R7 */ +#define H_ALL_RES_QP_ACT_SSGE EHEA_BMASK_IBM(0, 7) +#define H_ALL_RES_QP_ACT_R1SGE EHEA_BMASK_IBM(8, 15) +#define H_ALL_RES_QP_ACT_R2SGE EHEA_BMASK_IBM(16, 23) +#define H_ALL_RES_QP_ACT_R3SGE EHEA_BMASK_IBM(24, 31) +#define H_ALL_RES_QP_ACT_SWQE_IDL EHEA_BMASK_IBM(32, 39) + +/* output param R8,R9 */ +#define H_ALL_RES_QP_SIZE_SQ EHEA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_SIZE_RQ1 EHEA_BMASK_IBM(32, 63) +#define H_ALL_RES_QP_SIZE_RQ2 EHEA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_SIZE_RQ3 EHEA_BMASK_IBM(32, 63) + +/* output param R11,R12 */ +#define H_ALL_RES_QP_LIOBN_SQ EHEA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_LIOBN_RQ1 EHEA_BMASK_IBM(32, 63) +#define H_ALL_RES_QP_LIOBN_RQ2 EHEA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_LIOBN_RQ3 EHEA_BMASK_IBM(32, 63) + +u64 ehea_h_alloc_resource_qp(const u64 adapter_handle, + struct ehea_qp_init_attr *init_attr, const u32 pd, + u64 *qp_handle, struct h_epas *h_epas) +{ + u64 hret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + u64 allocate_controls = + EHEA_BMASK_SET(H_ALL_RES_QP_EQPO, init_attr->low_lat_rq1 ? 1 : 0) + | EHEA_BMASK_SET(H_ALL_RES_QP_QPP, 0) + | EHEA_BMASK_SET(H_ALL_RES_QP_RQR, 6) /* rq1 & rq2 & rq3 */ + | EHEA_BMASK_SET(H_ALL_RES_QP_EQEG, 0) /* EQE gen. disabled */ + | EHEA_BMASK_SET(H_ALL_RES_QP_LL_QP, init_attr->low_lat_rq1) + | EHEA_BMASK_SET(H_ALL_RES_QP_DMA128, 0) + | EHEA_BMASK_SET(H_ALL_RES_QP_HSM, 0) + | EHEA_BMASK_SET(H_ALL_RES_QP_SIGT, init_attr->signalingtype) + | EHEA_BMASK_SET(H_ALL_RES_QP_RES_TYP, H_ALL_RES_TYPE_QP); + + u64 r9_reg = EHEA_BMASK_SET(H_ALL_RES_QP_PD, pd) + | EHEA_BMASK_SET(H_ALL_RES_QP_TOKEN, init_attr->qp_token); + + u64 max_r10_reg = + EHEA_BMASK_SET(H_ALL_RES_QP_MAX_SWQE, + get_order_of_qentries(init_attr->max_nr_send_wqes)) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_R1WQE, + get_order_of_qentries(init_attr->max_nr_rwqes_rq1)) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_R2WQE, + get_order_of_qentries(init_attr->max_nr_rwqes_rq2)) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_R3WQE, + get_order_of_qentries(init_attr->max_nr_rwqes_rq3)) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_SSGE, init_attr->wqe_size_enc_sq) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_R1SGE, + init_attr->wqe_size_enc_rq1) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_R2SGE, + init_attr->wqe_size_enc_rq2) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_R3SGE, + init_attr->wqe_size_enc_rq3); + + u64 r11_in = + EHEA_BMASK_SET(H_ALL_RES_QP_SWQE_IDL, init_attr->swqe_imm_data_len) + | EHEA_BMASK_SET(H_ALL_RES_QP_PORT_NUM, init_attr->port_nr); + u64 threshold = + EHEA_BMASK_SET(H_ALL_RES_QP_TH_RQ2, init_attr->rq2_threshold) + | EHEA_BMASK_SET(H_ALL_RES_QP_TH_RQ3, init_attr->rq3_threshold); + + hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, + outs, + adapter_handle, /* R4 */ + allocate_controls, /* R5 */ + init_attr->send_cq_handle, /* R6 */ + init_attr->recv_cq_handle, /* R7 */ + init_attr->aff_eq_handle, /* R8 */ + r9_reg, /* R9 */ + max_r10_reg, /* R10 */ + r11_in, /* R11 */ + threshold); /* R12 */ + + *qp_handle = outs[0]; + init_attr->qp_nr = (u32)outs[1]; + + init_attr->act_nr_send_wqes = + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_SWQE, outs[2]); + init_attr->act_nr_rwqes_rq1 = + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R1WQE, outs[2]); + init_attr->act_nr_rwqes_rq2 = + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R2WQE, outs[2]); + init_attr->act_nr_rwqes_rq3 = + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R3WQE, outs[2]); + + init_attr->act_wqe_size_enc_sq = init_attr->wqe_size_enc_sq; + init_attr->act_wqe_size_enc_rq1 = init_attr->wqe_size_enc_rq1; + init_attr->act_wqe_size_enc_rq2 = init_attr->wqe_size_enc_rq2; + init_attr->act_wqe_size_enc_rq3 = init_attr->wqe_size_enc_rq3; + + init_attr->nr_sq_pages = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_SQ, outs[4]); + init_attr->nr_rq1_pages = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ1, outs[4]); + init_attr->nr_rq2_pages = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ2, outs[5]); + init_attr->nr_rq3_pages = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ3, outs[5]); + + init_attr->liobn_sq = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_SQ, outs[7]); + init_attr->liobn_rq1 = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ1, outs[7]); + init_attr->liobn_rq2 = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ2, outs[8]); + init_attr->liobn_rq3 = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ3, outs[8]); + + if (!hret) + hcp_epas_ctor(h_epas, outs[6], outs[6]); + + return hret; +} + +u64 ehea_h_alloc_resource_cq(const u64 adapter_handle, + struct ehea_cq_attr *cq_attr, + u64 *cq_handle, struct h_epas *epas) +{ + u64 hret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, + outs, + adapter_handle, /* R4 */ + H_ALL_RES_TYPE_CQ, /* R5 */ + cq_attr->eq_handle, /* R6 */ + cq_attr->cq_token, /* R7 */ + cq_attr->max_nr_of_cqes, /* R8 */ + 0, 0, 0, 0); /* R9-R12 */ + + *cq_handle = outs[0]; + cq_attr->act_nr_of_cqes = outs[3]; + cq_attr->nr_pages = outs[4]; + + if (!hret) + hcp_epas_ctor(epas, outs[5], outs[6]); + + return hret; +} + +/* Defines for H_CALL H_ALLOC_RESOURCE */ +#define H_ALL_RES_TYPE_QP 1 +#define H_ALL_RES_TYPE_CQ 2 +#define H_ALL_RES_TYPE_EQ 3 +#define H_ALL_RES_TYPE_MR 5 +#define H_ALL_RES_TYPE_MW 6 + +/* input param R5 */ +#define H_ALL_RES_EQ_NEQ EHEA_BMASK_IBM(0, 0) +#define H_ALL_RES_EQ_NON_NEQ_ISN EHEA_BMASK_IBM(6, 7) +#define H_ALL_RES_EQ_INH_EQE_GEN EHEA_BMASK_IBM(16, 16) +#define H_ALL_RES_EQ_RES_TYPE EHEA_BMASK_IBM(56, 63) +/* input param R6 */ +#define H_ALL_RES_EQ_MAX_EQE EHEA_BMASK_IBM(32, 63) + +/* output param R6 */ +#define H_ALL_RES_EQ_LIOBN EHEA_BMASK_IBM(32, 63) + +/* output param R7 */ +#define H_ALL_RES_EQ_ACT_EQE EHEA_BMASK_IBM(32, 63) + +/* output param R8 */ +#define H_ALL_RES_EQ_ACT_PS EHEA_BMASK_IBM(32, 63) + +/* output param R9 */ +#define H_ALL_RES_EQ_ACT_EQ_IST_C EHEA_BMASK_IBM(30, 31) +#define H_ALL_RES_EQ_ACT_EQ_IST_1 EHEA_BMASK_IBM(40, 63) + +/* output param R10 */ +#define H_ALL_RES_EQ_ACT_EQ_IST_2 EHEA_BMASK_IBM(40, 63) + +/* output param R11 */ +#define H_ALL_RES_EQ_ACT_EQ_IST_3 EHEA_BMASK_IBM(40, 63) + +/* output param R12 */ +#define H_ALL_RES_EQ_ACT_EQ_IST_4 EHEA_BMASK_IBM(40, 63) + +u64 ehea_h_alloc_resource_eq(const u64 adapter_handle, + struct ehea_eq_attr *eq_attr, u64 *eq_handle) +{ + u64 hret, allocate_controls; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + /* resource type */ + allocate_controls = + EHEA_BMASK_SET(H_ALL_RES_EQ_RES_TYPE, H_ALL_RES_TYPE_EQ) + | EHEA_BMASK_SET(H_ALL_RES_EQ_NEQ, eq_attr->type ? 1 : 0) + | EHEA_BMASK_SET(H_ALL_RES_EQ_INH_EQE_GEN, !eq_attr->eqe_gen) + | EHEA_BMASK_SET(H_ALL_RES_EQ_NON_NEQ_ISN, 1); + + hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, + outs, + adapter_handle, /* R4 */ + allocate_controls, /* R5 */ + eq_attr->max_nr_of_eqes, /* R6 */ + 0, 0, 0, 0, 0, 0); /* R7-R10 */ + + *eq_handle = outs[0]; + eq_attr->act_nr_of_eqes = outs[3]; + eq_attr->nr_pages = outs[4]; + eq_attr->ist1 = outs[5]; + eq_attr->ist2 = outs[6]; + eq_attr->ist3 = outs[7]; + eq_attr->ist4 = outs[8]; + + return hret; +} + +u64 ehea_h_modify_ehea_qp(const u64 adapter_handle, const u8 cat, + const u64 qp_handle, const u64 sel_mask, + void *cb_addr, u64 *inv_attr_id, u64 *proc_mask, + u16 *out_swr, u16 *out_rwr) +{ + u64 hret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + hret = ehea_plpar_hcall9(H_MODIFY_HEA_QP, + outs, + adapter_handle, /* R4 */ + (u64) cat, /* R5 */ + qp_handle, /* R6 */ + sel_mask, /* R7 */ + __pa(cb_addr), /* R8 */ + 0, 0, 0, 0); /* R9-R12 */ + + *inv_attr_id = outs[0]; + *out_swr = outs[3]; + *out_rwr = outs[4]; + *proc_mask = outs[5]; + + return hret; +} + +u64 ehea_h_register_rpage(const u64 adapter_handle, const u8 pagesize, + const u8 queue_type, const u64 resource_handle, + const u64 log_pageaddr, u64 count) +{ + u64 reg_control; + + reg_control = EHEA_BMASK_SET(H_REG_RPAGE_PAGE_SIZE, pagesize) + | EHEA_BMASK_SET(H_REG_RPAGE_QT, queue_type); + + return ehea_plpar_hcall_norets(H_REGISTER_HEA_RPAGES, + adapter_handle, /* R4 */ + reg_control, /* R5 */ + resource_handle, /* R6 */ + log_pageaddr, /* R7 */ + count, /* R8 */ + 0, 0); /* R9-R10 */ +} + +u64 ehea_h_register_smr(const u64 adapter_handle, const u64 orig_mr_handle, + const u64 vaddr_in, const u32 access_ctrl, const u32 pd, + struct ehea_mr *mr) +{ + u64 hret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + hret = ehea_plpar_hcall9(H_REGISTER_SMR, + outs, + adapter_handle , /* R4 */ + orig_mr_handle, /* R5 */ + vaddr_in, /* R6 */ + (((u64)access_ctrl) << 32ULL), /* R7 */ + pd, /* R8 */ + 0, 0, 0, 0); /* R9-R12 */ + + mr->handle = outs[0]; + mr->lkey = (u32)outs[2]; + + return hret; +} + +u64 ehea_h_disable_and_get_hea(const u64 adapter_handle, const u64 qp_handle) +{ + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + return ehea_plpar_hcall9(H_DISABLE_AND_GET_HEA, + outs, + adapter_handle, /* R4 */ + H_DISABLE_GET_EHEA_WQE_P, /* R5 */ + qp_handle, /* R6 */ + 0, 0, 0, 0, 0, 0); /* R7-R12 */ +} + +u64 ehea_h_free_resource(const u64 adapter_handle, const u64 res_handle, + u64 force_bit) +{ + return ehea_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle, /* R4 */ + res_handle, /* R5 */ + force_bit, + 0, 0, 0, 0); /* R7-R10 */ +} + +u64 ehea_h_alloc_resource_mr(const u64 adapter_handle, const u64 vaddr, + const u64 length, const u32 access_ctrl, + const u32 pd, u64 *mr_handle, u32 *lkey) +{ + u64 hret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, + outs, + adapter_handle, /* R4 */ + 5, /* R5 */ + vaddr, /* R6 */ + length, /* R7 */ + (((u64) access_ctrl) << 32ULL), /* R8 */ + pd, /* R9 */ + 0, 0, 0); /* R10-R12 */ + + *mr_handle = outs[0]; + *lkey = (u32)outs[2]; + return hret; +} + +u64 ehea_h_register_rpage_mr(const u64 adapter_handle, const u64 mr_handle, + const u8 pagesize, const u8 queue_type, + const u64 log_pageaddr, const u64 count) +{ + if ((count > 1) && (log_pageaddr & ~PAGE_MASK)) { + pr_err("not on pageboundary\n"); + return H_PARAMETER; + } + + return ehea_h_register_rpage(adapter_handle, pagesize, + queue_type, mr_handle, + log_pageaddr, count); +} + +u64 ehea_h_query_ehea(const u64 adapter_handle, void *cb_addr) +{ + u64 hret, cb_logaddr; + + cb_logaddr = __pa(cb_addr); + + hret = ehea_plpar_hcall_norets(H_QUERY_HEA, + adapter_handle, /* R4 */ + cb_logaddr, /* R5 */ + 0, 0, 0, 0, 0); /* R6-R10 */ +#ifdef DEBUG + ehea_dump(cb_addr, sizeof(struct hcp_query_ehea), "hcp_query_ehea"); +#endif + return hret; +} + +u64 ehea_h_query_ehea_port(const u64 adapter_handle, const u16 port_num, + const u8 cb_cat, const u64 select_mask, + void *cb_addr) +{ + u64 port_info; + u64 cb_logaddr = __pa(cb_addr); + u64 arr_index = 0; + + port_info = EHEA_BMASK_SET(H_MEHEAPORT_CAT, cb_cat) + | EHEA_BMASK_SET(H_MEHEAPORT_PN, port_num); + + return ehea_plpar_hcall_norets(H_QUERY_HEA_PORT, + adapter_handle, /* R4 */ + port_info, /* R5 */ + select_mask, /* R6 */ + arr_index, /* R7 */ + cb_logaddr, /* R8 */ + 0, 0); /* R9-R10 */ +} + +u64 ehea_h_modify_ehea_port(const u64 adapter_handle, const u16 port_num, + const u8 cb_cat, const u64 select_mask, + void *cb_addr) +{ + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + u64 port_info; + u64 arr_index = 0; + u64 cb_logaddr = __pa(cb_addr); + + port_info = EHEA_BMASK_SET(H_MEHEAPORT_CAT, cb_cat) + | EHEA_BMASK_SET(H_MEHEAPORT_PN, port_num); +#ifdef DEBUG + ehea_dump(cb_addr, sizeof(struct hcp_ehea_port_cb0), "Before HCALL"); +#endif + return ehea_plpar_hcall9(H_MODIFY_HEA_PORT, + outs, + adapter_handle, /* R4 */ + port_info, /* R5 */ + select_mask, /* R6 */ + arr_index, /* R7 */ + cb_logaddr, /* R8 */ + 0, 0, 0, 0); /* R9-R12 */ +} + +u64 ehea_h_reg_dereg_bcmc(const u64 adapter_handle, const u16 port_num, + const u8 reg_type, const u64 mc_mac_addr, + const u16 vlan_id, const u32 hcall_id) +{ + u64 r5_port_num, r6_reg_type, r7_mc_mac_addr, r8_vlan_id; + u64 mac_addr = mc_mac_addr >> 16; + + r5_port_num = EHEA_BMASK_SET(H_REGBCMC_PN, port_num); + r6_reg_type = EHEA_BMASK_SET(H_REGBCMC_REGTYPE, reg_type); + r7_mc_mac_addr = EHEA_BMASK_SET(H_REGBCMC_MACADDR, mac_addr); + r8_vlan_id = EHEA_BMASK_SET(H_REGBCMC_VLANID, vlan_id); + + return ehea_plpar_hcall_norets(hcall_id, + adapter_handle, /* R4 */ + r5_port_num, /* R5 */ + r6_reg_type, /* R6 */ + r7_mc_mac_addr, /* R7 */ + r8_vlan_id, /* R8 */ + 0, 0); /* R9-R12 */ +} + +u64 ehea_h_reset_events(const u64 adapter_handle, const u64 neq_handle, + const u64 event_mask) +{ + return ehea_plpar_hcall_norets(H_RESET_EVENTS, + adapter_handle, /* R4 */ + neq_handle, /* R5 */ + event_mask, /* R6 */ + 0, 0, 0, 0); /* R7-R12 */ +} + +u64 ehea_h_error_data(const u64 adapter_handle, const u64 ressource_handle, + void *rblock) +{ + return ehea_plpar_hcall_norets(H_ERROR_DATA, + adapter_handle, /* R4 */ + ressource_handle, /* R5 */ + __pa(rblock), /* R6 */ + 0, 0, 0, 0); /* R7-R12 */ +} diff --git a/drivers/net/ethernet/ibm/ehea/ehea_phyp.h b/drivers/net/ethernet/ibm/ehea/ehea_phyp.h new file mode 100644 index 000000000..e8b56c103 --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_phyp.h @@ -0,0 +1,433 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/drivers/net/ethernet/ibm/ehea/ehea_phyp.h + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + */ + +#ifndef __EHEA_PHYP_H__ +#define __EHEA_PHYP_H__ + +#include <linux/delay.h> +#include <asm/hvcall.h> +#include "ehea.h" +#include "ehea_hw.h" + +/* Some abbreviations used here: + * + * hcp_* - structures, variables and functions releated to Hypervisor Calls + */ + +/* Number of pages which can be registered at once by H_REGISTER_HEA_RPAGES */ +#define EHEA_MAX_RPAGE 512 + +/* Notification Event Queue (NEQ) Entry bit masks */ +#define NEQE_EVENT_CODE EHEA_BMASK_IBM(2, 7) +#define NEQE_PORTNUM EHEA_BMASK_IBM(32, 47) +#define NEQE_PORT_UP EHEA_BMASK_IBM(16, 16) +#define NEQE_EXTSWITCH_PORT_UP EHEA_BMASK_IBM(17, 17) +#define NEQE_EXTSWITCH_PRIMARY EHEA_BMASK_IBM(18, 18) +#define NEQE_PLID EHEA_BMASK_IBM(16, 47) + +/* Notification Event Codes */ +#define EHEA_EC_PORTSTATE_CHG 0x30 +#define EHEA_EC_ADAPTER_MALFUNC 0x32 +#define EHEA_EC_PORT_MALFUNC 0x33 + +/* Notification Event Log Register (NELR) bit masks */ +#define NELR_PORT_MALFUNC EHEA_BMASK_IBM(61, 61) +#define NELR_ADAPTER_MALFUNC EHEA_BMASK_IBM(62, 62) +#define NELR_PORTSTATE_CHG EHEA_BMASK_IBM(63, 63) + +static inline void hcp_epas_ctor(struct h_epas *epas, u64 paddr_kernel, + u64 paddr_user) +{ + /* To support 64k pages we must round to 64k page boundary */ + epas->kernel.addr = ioremap((paddr_kernel & PAGE_MASK), PAGE_SIZE) + + (paddr_kernel & ~PAGE_MASK); + epas->user.addr = paddr_user; +} + +static inline void hcp_epas_dtor(struct h_epas *epas) +{ + if (epas->kernel.addr) + iounmap((void __iomem *)((u64)epas->kernel.addr & PAGE_MASK)); + + epas->user.addr = 0; + epas->kernel.addr = 0; +} + +struct hcp_modify_qp_cb0 { + u64 qp_ctl_reg; /* 00 */ + u32 max_swqe; /* 02 */ + u32 max_rwqe; /* 03 */ + u32 port_nb; /* 04 */ + u32 reserved0; /* 05 */ + u64 qp_aer; /* 06 */ + u64 qp_tenure; /* 08 */ +}; + +/* Hcall Query/Modify Queue Pair Control Block 0 Selection Mask Bits */ +#define H_QPCB0_ALL EHEA_BMASK_IBM(0, 5) +#define H_QPCB0_QP_CTL_REG EHEA_BMASK_IBM(0, 0) +#define H_QPCB0_MAX_SWQE EHEA_BMASK_IBM(1, 1) +#define H_QPCB0_MAX_RWQE EHEA_BMASK_IBM(2, 2) +#define H_QPCB0_PORT_NB EHEA_BMASK_IBM(3, 3) +#define H_QPCB0_QP_AER EHEA_BMASK_IBM(4, 4) +#define H_QPCB0_QP_TENURE EHEA_BMASK_IBM(5, 5) + +/* Queue Pair Control Register Status Bits */ +#define H_QP_CR_ENABLED 0x8000000000000000ULL /* QP enabled */ + /* QP States: */ +#define H_QP_CR_STATE_RESET 0x0000010000000000ULL /* Reset */ +#define H_QP_CR_STATE_INITIALIZED 0x0000020000000000ULL /* Initialized */ +#define H_QP_CR_STATE_RDY2RCV 0x0000030000000000ULL /* Ready to recv */ +#define H_QP_CR_STATE_RDY2SND 0x0000050000000000ULL /* Ready to send */ +#define H_QP_CR_STATE_ERROR 0x0000800000000000ULL /* Error */ +#define H_QP_CR_RES_STATE 0x0000007F00000000ULL /* Resultant state */ + +struct hcp_modify_qp_cb1 { + u32 qpn; /* 00 */ + u32 qp_asyn_ev_eq_nb; /* 01 */ + u64 sq_cq_handle; /* 02 */ + u64 rq_cq_handle; /* 04 */ + /* sgel = scatter gather element */ + u32 sgel_nb_sq; /* 06 */ + u32 sgel_nb_rq1; /* 07 */ + u32 sgel_nb_rq2; /* 08 */ + u32 sgel_nb_rq3; /* 09 */ +}; + +/* Hcall Query/Modify Queue Pair Control Block 1 Selection Mask Bits */ +#define H_QPCB1_ALL EHEA_BMASK_IBM(0, 7) +#define H_QPCB1_QPN EHEA_BMASK_IBM(0, 0) +#define H_QPCB1_ASYN_EV_EQ_NB EHEA_BMASK_IBM(1, 1) +#define H_QPCB1_SQ_CQ_HANDLE EHEA_BMASK_IBM(2, 2) +#define H_QPCB1_RQ_CQ_HANDLE EHEA_BMASK_IBM(3, 3) +#define H_QPCB1_SGEL_NB_SQ EHEA_BMASK_IBM(4, 4) +#define H_QPCB1_SGEL_NB_RQ1 EHEA_BMASK_IBM(5, 5) +#define H_QPCB1_SGEL_NB_RQ2 EHEA_BMASK_IBM(6, 6) +#define H_QPCB1_SGEL_NB_RQ3 EHEA_BMASK_IBM(7, 7) + +struct hcp_query_ehea { + u32 cur_num_qps; /* 00 */ + u32 cur_num_cqs; /* 01 */ + u32 cur_num_eqs; /* 02 */ + u32 cur_num_mrs; /* 03 */ + u32 auth_level; /* 04 */ + u32 max_num_qps; /* 05 */ + u32 max_num_cqs; /* 06 */ + u32 max_num_eqs; /* 07 */ + u32 max_num_mrs; /* 08 */ + u32 reserved0; /* 09 */ + u32 int_clock_freq; /* 10 */ + u32 max_num_pds; /* 11 */ + u32 max_num_addr_handles; /* 12 */ + u32 max_num_cqes; /* 13 */ + u32 max_num_wqes; /* 14 */ + u32 max_num_sgel_rq1wqe; /* 15 */ + u32 max_num_sgel_rq2wqe; /* 16 */ + u32 max_num_sgel_rq3wqe; /* 17 */ + u32 mr_page_size; /* 18 */ + u32 reserved1; /* 19 */ + u64 max_mr_size; /* 20 */ + u64 reserved2; /* 22 */ + u32 num_ports; /* 24 */ + u32 reserved3; /* 25 */ + u32 reserved4; /* 26 */ + u32 reserved5; /* 27 */ + u64 max_mc_mac; /* 28 */ + u64 ehea_cap; /* 30 */ + u32 max_isn_per_eq; /* 32 */ + u32 max_num_neq; /* 33 */ + u64 max_num_vlan_ids; /* 34 */ + u32 max_num_port_group; /* 36 */ + u32 max_num_phys_port; /* 37 */ + +}; + +/* Hcall Query/Modify Port Control Block defines */ +#define H_PORT_CB0 0 +#define H_PORT_CB1 1 +#define H_PORT_CB2 2 +#define H_PORT_CB3 3 +#define H_PORT_CB4 4 +#define H_PORT_CB5 5 +#define H_PORT_CB6 6 +#define H_PORT_CB7 7 + +struct hcp_ehea_port_cb0 { + u64 port_mac_addr; + u64 port_rc; + u64 reserved0; + u32 port_op_state; + u32 port_speed; + u32 ext_swport_op_state; + u32 neg_tpf_prpf; + u32 num_default_qps; + u32 reserved1; + u64 default_qpn_arr[16]; +}; + +/* Hcall Query/Modify Port Control Block 0 Selection Mask Bits */ +#define H_PORT_CB0_ALL EHEA_BMASK_IBM(0, 7) /* Set all bits */ +#define H_PORT_CB0_MAC EHEA_BMASK_IBM(0, 0) /* MAC address */ +#define H_PORT_CB0_PRC EHEA_BMASK_IBM(1, 1) /* Port Recv Control */ +#define H_PORT_CB0_DEFQPNARRAY EHEA_BMASK_IBM(7, 7) /* Default QPN Array */ + +/* Hcall Query Port: Returned port speed values */ +#define H_SPEED_10M_H 1 /* 10 Mbps, Half Duplex */ +#define H_SPEED_10M_F 2 /* 10 Mbps, Full Duplex */ +#define H_SPEED_100M_H 3 /* 100 Mbps, Half Duplex */ +#define H_SPEED_100M_F 4 /* 100 Mbps, Full Duplex */ +#define H_SPEED_1G_F 6 /* 1 Gbps, Full Duplex */ +#define H_SPEED_10G_F 8 /* 10 Gbps, Full Duplex */ + +/* Port Receive Control Status Bits */ +#define PXLY_RC_VALID EHEA_BMASK_IBM(49, 49) +#define PXLY_RC_VLAN_XTRACT EHEA_BMASK_IBM(50, 50) +#define PXLY_RC_TCP_6_TUPLE EHEA_BMASK_IBM(51, 51) +#define PXLY_RC_UDP_6_TUPLE EHEA_BMASK_IBM(52, 52) +#define PXLY_RC_TCP_3_TUPLE EHEA_BMASK_IBM(53, 53) +#define PXLY_RC_TCP_2_TUPLE EHEA_BMASK_IBM(54, 54) +#define PXLY_RC_LLC_SNAP EHEA_BMASK_IBM(55, 55) +#define PXLY_RC_JUMBO_FRAME EHEA_BMASK_IBM(56, 56) +#define PXLY_RC_FRAG_IP_PKT EHEA_BMASK_IBM(57, 57) +#define PXLY_RC_TCP_UDP_CHKSUM EHEA_BMASK_IBM(58, 58) +#define PXLY_RC_IP_CHKSUM EHEA_BMASK_IBM(59, 59) +#define PXLY_RC_MAC_FILTER EHEA_BMASK_IBM(60, 60) +#define PXLY_RC_UNTAG_FILTER EHEA_BMASK_IBM(61, 61) +#define PXLY_RC_VLAN_TAG_FILTER EHEA_BMASK_IBM(62, 63) + +#define PXLY_RC_VLAN_FILTER 2 +#define PXLY_RC_VLAN_PERM 0 + + +#define H_PORT_CB1_ALL 0x8000000000000000ULL + +struct hcp_ehea_port_cb1 { + u64 vlan_filter[64]; +}; + +#define H_PORT_CB2_ALL 0xFFE0000000000000ULL + +struct hcp_ehea_port_cb2 { + u64 rxo; + u64 rxucp; + u64 rxufd; + u64 rxuerr; + u64 rxftl; + u64 rxmcp; + u64 rxbcp; + u64 txo; + u64 txucp; + u64 txmcp; + u64 txbcp; +}; + +struct hcp_ehea_port_cb3 { + u64 vlan_bc_filter[64]; + u64 vlan_mc_filter[64]; + u64 vlan_un_filter[64]; + u64 port_mac_hash_array[64]; +}; + +#define H_PORT_CB4_ALL 0xF000000000000000ULL +#define H_PORT_CB4_JUMBO 0x1000000000000000ULL +#define H_PORT_CB4_SPEED 0x8000000000000000ULL + +struct hcp_ehea_port_cb4 { + u32 port_speed; + u32 pause_frame; + u32 ens_port_op_state; + u32 jumbo_frame; + u32 ens_port_wrap; +}; + +/* Hcall Query/Modify Port Control Block 5 Selection Mask Bits */ +#define H_PORT_CB5_RCU 0x0001000000000000ULL +#define PXS_RCU EHEA_BMASK_IBM(61, 63) + +struct hcp_ehea_port_cb5 { + u64 prc; /* 00 */ + u64 uaa; /* 01 */ + u64 macvc; /* 02 */ + u64 xpcsc; /* 03 */ + u64 xpcsp; /* 04 */ + u64 pcsid; /* 05 */ + u64 xpcsst; /* 06 */ + u64 pthlb; /* 07 */ + u64 pthrb; /* 08 */ + u64 pqu; /* 09 */ + u64 pqd; /* 10 */ + u64 prt; /* 11 */ + u64 wsth; /* 12 */ + u64 rcb; /* 13 */ + u64 rcm; /* 14 */ + u64 rcu; /* 15 */ + u64 macc; /* 16 */ + u64 pc; /* 17 */ + u64 pst; /* 18 */ + u64 ducqpn; /* 19 */ + u64 mcqpn; /* 20 */ + u64 mma; /* 21 */ + u64 pmc0h; /* 22 */ + u64 pmc0l; /* 23 */ + u64 lbc; /* 24 */ +}; + +#define H_PORT_CB6_ALL 0xFFFFFE7FFFFF8000ULL + +struct hcp_ehea_port_cb6 { + u64 rxo; /* 00 */ + u64 rx64; /* 01 */ + u64 rx65; /* 02 */ + u64 rx128; /* 03 */ + u64 rx256; /* 04 */ + u64 rx512; /* 05 */ + u64 rx1024; /* 06 */ + u64 rxbfcs; /* 07 */ + u64 rxime; /* 08 */ + u64 rxrle; /* 09 */ + u64 rxorle; /* 10 */ + u64 rxftl; /* 11 */ + u64 rxjab; /* 12 */ + u64 rxse; /* 13 */ + u64 rxce; /* 14 */ + u64 rxrf; /* 15 */ + u64 rxfrag; /* 16 */ + u64 rxuoc; /* 17 */ + u64 rxcpf; /* 18 */ + u64 rxsb; /* 19 */ + u64 rxfd; /* 20 */ + u64 rxoerr; /* 21 */ + u64 rxaln; /* 22 */ + u64 ducqpn; /* 23 */ + u64 reserved0; /* 24 */ + u64 rxmcp; /* 25 */ + u64 rxbcp; /* 26 */ + u64 txmcp; /* 27 */ + u64 txbcp; /* 28 */ + u64 txo; /* 29 */ + u64 tx64; /* 30 */ + u64 tx65; /* 31 */ + u64 tx128; /* 32 */ + u64 tx256; /* 33 */ + u64 tx512; /* 34 */ + u64 tx1024; /* 35 */ + u64 txbfcs; /* 36 */ + u64 txcpf; /* 37 */ + u64 txlf; /* 38 */ + u64 txrf; /* 39 */ + u64 txime; /* 40 */ + u64 txsc; /* 41 */ + u64 txmc; /* 42 */ + u64 txsqe; /* 43 */ + u64 txdef; /* 44 */ + u64 txlcol; /* 45 */ + u64 txexcol; /* 46 */ + u64 txcse; /* 47 */ + u64 txbor; /* 48 */ +}; + +#define H_PORT_CB7_DUCQPN 0x8000000000000000ULL + +struct hcp_ehea_port_cb7 { + u64 def_uc_qpn; +}; + +u64 ehea_h_query_ehea_qp(const u64 adapter_handle, + const u8 qp_category, + const u64 qp_handle, const u64 sel_mask, + void *cb_addr); + +u64 ehea_h_modify_ehea_qp(const u64 adapter_handle, + const u8 cat, + const u64 qp_handle, + const u64 sel_mask, + void *cb_addr, + u64 *inv_attr_id, + u64 *proc_mask, u16 *out_swr, u16 *out_rwr); + +u64 ehea_h_alloc_resource_eq(const u64 adapter_handle, + struct ehea_eq_attr *eq_attr, u64 *eq_handle); + +u64 ehea_h_alloc_resource_cq(const u64 adapter_handle, + struct ehea_cq_attr *cq_attr, + u64 *cq_handle, struct h_epas *epas); + +u64 ehea_h_alloc_resource_qp(const u64 adapter_handle, + struct ehea_qp_init_attr *init_attr, + const u32 pd, + u64 *qp_handle, struct h_epas *h_epas); + +#define H_REG_RPAGE_PAGE_SIZE EHEA_BMASK_IBM(48, 55) +#define H_REG_RPAGE_QT EHEA_BMASK_IBM(62, 63) + +u64 ehea_h_register_rpage(const u64 adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 log_pageaddr, u64 count); + +#define H_DISABLE_GET_EHEA_WQE_P 1 +#define H_DISABLE_GET_SQ_WQE_P 2 +#define H_DISABLE_GET_RQC 3 + +u64 ehea_h_disable_and_get_hea(const u64 adapter_handle, const u64 qp_handle); + +#define FORCE_FREE 1 +#define NORMAL_FREE 0 + +u64 ehea_h_free_resource(const u64 adapter_handle, const u64 res_handle, + u64 force_bit); + +u64 ehea_h_alloc_resource_mr(const u64 adapter_handle, const u64 vaddr, + const u64 length, const u32 access_ctrl, + const u32 pd, u64 *mr_handle, u32 *lkey); + +u64 ehea_h_register_rpage_mr(const u64 adapter_handle, const u64 mr_handle, + const u8 pagesize, const u8 queue_type, + const u64 log_pageaddr, const u64 count); + +u64 ehea_h_register_smr(const u64 adapter_handle, const u64 orig_mr_handle, + const u64 vaddr_in, const u32 access_ctrl, const u32 pd, + struct ehea_mr *mr); + +u64 ehea_h_query_ehea(const u64 adapter_handle, void *cb_addr); + +/* output param R5 */ +#define H_MEHEAPORT_CAT EHEA_BMASK_IBM(40, 47) +#define H_MEHEAPORT_PN EHEA_BMASK_IBM(48, 63) + +u64 ehea_h_query_ehea_port(const u64 adapter_handle, const u16 port_num, + const u8 cb_cat, const u64 select_mask, + void *cb_addr); + +u64 ehea_h_modify_ehea_port(const u64 adapter_handle, const u16 port_num, + const u8 cb_cat, const u64 select_mask, + void *cb_addr); + +#define H_REGBCMC_PN EHEA_BMASK_IBM(48, 63) +#define H_REGBCMC_REGTYPE EHEA_BMASK_IBM(60, 63) +#define H_REGBCMC_MACADDR EHEA_BMASK_IBM(16, 63) +#define H_REGBCMC_VLANID EHEA_BMASK_IBM(52, 63) + +u64 ehea_h_reg_dereg_bcmc(const u64 adapter_handle, const u16 port_num, + const u8 reg_type, const u64 mc_mac_addr, + const u16 vlan_id, const u32 hcall_id); + +u64 ehea_h_reset_events(const u64 adapter_handle, const u64 neq_handle, + const u64 event_mask); + +u64 ehea_h_error_data(const u64 adapter_handle, const u64 ressource_handle, + void *rblock); + +#endif /* __EHEA_PHYP_H__ */ diff --git a/drivers/net/ethernet/ibm/ehea/ehea_qmr.c b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c new file mode 100644 index 000000000..db45373ea --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c @@ -0,0 +1,999 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * linux/drivers/net/ethernet/ibm/ehea/ehea_qmr.c + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/mm.h> +#include <linux/slab.h> +#include "ehea.h" +#include "ehea_phyp.h" +#include "ehea_qmr.h" + +static struct ehea_bmap *ehea_bmap; + +static void *hw_qpageit_get_inc(struct hw_queue *queue) +{ + void *retvalue = hw_qeit_get(queue); + + queue->current_q_offset += queue->pagesize; + if (queue->current_q_offset > queue->queue_length) { + queue->current_q_offset -= queue->pagesize; + retvalue = NULL; + } else if (((u64) retvalue) & (EHEA_PAGESIZE-1)) { + pr_err("not on pageboundary\n"); + retvalue = NULL; + } + return retvalue; +} + +static int hw_queue_ctor(struct hw_queue *queue, const u32 nr_of_pages, + const u32 pagesize, const u32 qe_size) +{ + int pages_per_kpage = PAGE_SIZE / pagesize; + int i, k; + + if ((pagesize > PAGE_SIZE) || (!pages_per_kpage)) { + pr_err("pagesize conflict! kernel pagesize=%d, ehea pagesize=%d\n", + (int)PAGE_SIZE, (int)pagesize); + return -EINVAL; + } + + queue->queue_length = nr_of_pages * pagesize; + queue->queue_pages = kmalloc_array(nr_of_pages, sizeof(void *), + GFP_KERNEL); + if (!queue->queue_pages) + return -ENOMEM; + + /* + * allocate pages for queue: + * outer loop allocates whole kernel pages (page aligned) and + * inner loop divides a kernel page into smaller hea queue pages + */ + i = 0; + while (i < nr_of_pages) { + u8 *kpage = (u8 *)get_zeroed_page(GFP_KERNEL); + if (!kpage) + goto out_nomem; + for (k = 0; k < pages_per_kpage && i < nr_of_pages; k++) { + (queue->queue_pages)[i] = (struct ehea_page *)kpage; + kpage += pagesize; + i++; + } + } + + queue->current_q_offset = 0; + queue->qe_size = qe_size; + queue->pagesize = pagesize; + queue->toggle_state = 1; + + return 0; +out_nomem: + for (i = 0; i < nr_of_pages; i += pages_per_kpage) { + if (!(queue->queue_pages)[i]) + break; + free_page((unsigned long)(queue->queue_pages)[i]); + } + return -ENOMEM; +} + +static void hw_queue_dtor(struct hw_queue *queue) +{ + int pages_per_kpage; + int i, nr_pages; + + if (!queue || !queue->queue_pages) + return; + + pages_per_kpage = PAGE_SIZE / queue->pagesize; + + nr_pages = queue->queue_length / queue->pagesize; + + for (i = 0; i < nr_pages; i += pages_per_kpage) + free_page((unsigned long)(queue->queue_pages)[i]); + + kfree(queue->queue_pages); +} + +struct ehea_cq *ehea_create_cq(struct ehea_adapter *adapter, + int nr_of_cqe, u64 eq_handle, u32 cq_token) +{ + struct ehea_cq *cq; + u64 hret, rpage; + u32 counter; + int ret; + void *vpage; + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + goto out_nomem; + + cq->attr.max_nr_of_cqes = nr_of_cqe; + cq->attr.cq_token = cq_token; + cq->attr.eq_handle = eq_handle; + + cq->adapter = adapter; + + hret = ehea_h_alloc_resource_cq(adapter->handle, &cq->attr, + &cq->fw_handle, &cq->epas); + if (hret != H_SUCCESS) { + pr_err("alloc_resource_cq failed\n"); + goto out_freemem; + } + + ret = hw_queue_ctor(&cq->hw_queue, cq->attr.nr_pages, + EHEA_PAGESIZE, sizeof(struct ehea_cqe)); + if (ret) + goto out_freeres; + + for (counter = 0; counter < cq->attr.nr_pages; counter++) { + vpage = hw_qpageit_get_inc(&cq->hw_queue); + if (!vpage) { + pr_err("hw_qpageit_get_inc failed\n"); + goto out_kill_hwq; + } + + rpage = __pa(vpage); + hret = ehea_h_register_rpage(adapter->handle, + 0, EHEA_CQ_REGISTER_ORIG, + cq->fw_handle, rpage, 1); + if (hret < H_SUCCESS) { + pr_err("register_rpage_cq failed ehea_cq=%p hret=%llx counter=%i act_pages=%i\n", + cq, hret, counter, cq->attr.nr_pages); + goto out_kill_hwq; + } + + if (counter == (cq->attr.nr_pages - 1)) { + vpage = hw_qpageit_get_inc(&cq->hw_queue); + + if ((hret != H_SUCCESS) || (vpage)) { + pr_err("registration of pages not complete hret=%llx\n", + hret); + goto out_kill_hwq; + } + } else { + if (hret != H_PAGE_REGISTERED) { + pr_err("CQ: registration of page failed hret=%llx\n", + hret); + goto out_kill_hwq; + } + } + } + + hw_qeit_reset(&cq->hw_queue); + ehea_reset_cq_ep(cq); + ehea_reset_cq_n1(cq); + + return cq; + +out_kill_hwq: + hw_queue_dtor(&cq->hw_queue); + +out_freeres: + ehea_h_free_resource(adapter->handle, cq->fw_handle, FORCE_FREE); + +out_freemem: + kfree(cq); + +out_nomem: + return NULL; +} + +static u64 ehea_destroy_cq_res(struct ehea_cq *cq, u64 force) +{ + u64 hret; + u64 adapter_handle = cq->adapter->handle; + + /* deregister all previous registered pages */ + hret = ehea_h_free_resource(adapter_handle, cq->fw_handle, force); + if (hret != H_SUCCESS) + return hret; + + hw_queue_dtor(&cq->hw_queue); + kfree(cq); + + return hret; +} + +int ehea_destroy_cq(struct ehea_cq *cq) +{ + u64 hret, aer, aerr; + if (!cq) + return 0; + + hcp_epas_dtor(&cq->epas); + hret = ehea_destroy_cq_res(cq, NORMAL_FREE); + if (hret == H_R_STATE) { + ehea_error_data(cq->adapter, cq->fw_handle, &aer, &aerr); + hret = ehea_destroy_cq_res(cq, FORCE_FREE); + } + + if (hret != H_SUCCESS) { + pr_err("destroy CQ failed\n"); + return -EIO; + } + + return 0; +} + +struct ehea_eq *ehea_create_eq(struct ehea_adapter *adapter, + const enum ehea_eq_type type, + const u32 max_nr_of_eqes, const u8 eqe_gen) +{ + int ret, i; + u64 hret, rpage; + void *vpage; + struct ehea_eq *eq; + + eq = kzalloc(sizeof(*eq), GFP_KERNEL); + if (!eq) + return NULL; + + eq->adapter = adapter; + eq->attr.type = type; + eq->attr.max_nr_of_eqes = max_nr_of_eqes; + eq->attr.eqe_gen = eqe_gen; + spin_lock_init(&eq->spinlock); + + hret = ehea_h_alloc_resource_eq(adapter->handle, + &eq->attr, &eq->fw_handle); + if (hret != H_SUCCESS) { + pr_err("alloc_resource_eq failed\n"); + goto out_freemem; + } + + ret = hw_queue_ctor(&eq->hw_queue, eq->attr.nr_pages, + EHEA_PAGESIZE, sizeof(struct ehea_eqe)); + if (ret) { + pr_err("can't allocate eq pages\n"); + goto out_freeres; + } + + for (i = 0; i < eq->attr.nr_pages; i++) { + vpage = hw_qpageit_get_inc(&eq->hw_queue); + if (!vpage) { + pr_err("hw_qpageit_get_inc failed\n"); + hret = H_RESOURCE; + goto out_kill_hwq; + } + + rpage = __pa(vpage); + + hret = ehea_h_register_rpage(adapter->handle, 0, + EHEA_EQ_REGISTER_ORIG, + eq->fw_handle, rpage, 1); + + if (i == (eq->attr.nr_pages - 1)) { + /* last page */ + vpage = hw_qpageit_get_inc(&eq->hw_queue); + if ((hret != H_SUCCESS) || (vpage)) + goto out_kill_hwq; + + } else { + if (hret != H_PAGE_REGISTERED) + goto out_kill_hwq; + + } + } + + hw_qeit_reset(&eq->hw_queue); + return eq; + +out_kill_hwq: + hw_queue_dtor(&eq->hw_queue); + +out_freeres: + ehea_h_free_resource(adapter->handle, eq->fw_handle, FORCE_FREE); + +out_freemem: + kfree(eq); + return NULL; +} + +struct ehea_eqe *ehea_poll_eq(struct ehea_eq *eq) +{ + struct ehea_eqe *eqe; + unsigned long flags; + + spin_lock_irqsave(&eq->spinlock, flags); + eqe = hw_eqit_eq_get_inc_valid(&eq->hw_queue); + spin_unlock_irqrestore(&eq->spinlock, flags); + + return eqe; +} + +static u64 ehea_destroy_eq_res(struct ehea_eq *eq, u64 force) +{ + u64 hret; + unsigned long flags; + + spin_lock_irqsave(&eq->spinlock, flags); + + hret = ehea_h_free_resource(eq->adapter->handle, eq->fw_handle, force); + spin_unlock_irqrestore(&eq->spinlock, flags); + + if (hret != H_SUCCESS) + return hret; + + hw_queue_dtor(&eq->hw_queue); + kfree(eq); + + return hret; +} + +int ehea_destroy_eq(struct ehea_eq *eq) +{ + u64 hret, aer, aerr; + if (!eq) + return 0; + + hcp_epas_dtor(&eq->epas); + + hret = ehea_destroy_eq_res(eq, NORMAL_FREE); + if (hret == H_R_STATE) { + ehea_error_data(eq->adapter, eq->fw_handle, &aer, &aerr); + hret = ehea_destroy_eq_res(eq, FORCE_FREE); + } + + if (hret != H_SUCCESS) { + pr_err("destroy EQ failed\n"); + return -EIO; + } + + return 0; +} + +/* allocates memory for a queue and registers pages in phyp */ +static int ehea_qp_alloc_register(struct ehea_qp *qp, struct hw_queue *hw_queue, + int nr_pages, int wqe_size, int act_nr_sges, + struct ehea_adapter *adapter, int h_call_q_selector) +{ + u64 hret, rpage; + int ret, cnt; + void *vpage; + + ret = hw_queue_ctor(hw_queue, nr_pages, EHEA_PAGESIZE, wqe_size); + if (ret) + return ret; + + for (cnt = 0; cnt < nr_pages; cnt++) { + vpage = hw_qpageit_get_inc(hw_queue); + if (!vpage) { + pr_err("hw_qpageit_get_inc failed\n"); + goto out_kill_hwq; + } + rpage = __pa(vpage); + hret = ehea_h_register_rpage(adapter->handle, + 0, h_call_q_selector, + qp->fw_handle, rpage, 1); + if (hret < H_SUCCESS) { + pr_err("register_rpage_qp failed\n"); + goto out_kill_hwq; + } + } + hw_qeit_reset(hw_queue); + return 0; + +out_kill_hwq: + hw_queue_dtor(hw_queue); + return -EIO; +} + +static inline u32 map_wqe_size(u8 wqe_enc_size) +{ + return 128 << wqe_enc_size; +} + +struct ehea_qp *ehea_create_qp(struct ehea_adapter *adapter, + u32 pd, struct ehea_qp_init_attr *init_attr) +{ + int ret; + u64 hret; + struct ehea_qp *qp; + u32 wqe_size_in_bytes_sq, wqe_size_in_bytes_rq1; + u32 wqe_size_in_bytes_rq2, wqe_size_in_bytes_rq3; + + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return NULL; + + qp->adapter = adapter; + + hret = ehea_h_alloc_resource_qp(adapter->handle, init_attr, pd, + &qp->fw_handle, &qp->epas); + if (hret != H_SUCCESS) { + pr_err("ehea_h_alloc_resource_qp failed\n"); + goto out_freemem; + } + + wqe_size_in_bytes_sq = map_wqe_size(init_attr->act_wqe_size_enc_sq); + wqe_size_in_bytes_rq1 = map_wqe_size(init_attr->act_wqe_size_enc_rq1); + wqe_size_in_bytes_rq2 = map_wqe_size(init_attr->act_wqe_size_enc_rq2); + wqe_size_in_bytes_rq3 = map_wqe_size(init_attr->act_wqe_size_enc_rq3); + + ret = ehea_qp_alloc_register(qp, &qp->hw_squeue, init_attr->nr_sq_pages, + wqe_size_in_bytes_sq, + init_attr->act_wqe_size_enc_sq, adapter, + 0); + if (ret) { + pr_err("can't register for sq ret=%x\n", ret); + goto out_freeres; + } + + ret = ehea_qp_alloc_register(qp, &qp->hw_rqueue1, + init_attr->nr_rq1_pages, + wqe_size_in_bytes_rq1, + init_attr->act_wqe_size_enc_rq1, + adapter, 1); + if (ret) { + pr_err("can't register for rq1 ret=%x\n", ret); + goto out_kill_hwsq; + } + + if (init_attr->rq_count > 1) { + ret = ehea_qp_alloc_register(qp, &qp->hw_rqueue2, + init_attr->nr_rq2_pages, + wqe_size_in_bytes_rq2, + init_attr->act_wqe_size_enc_rq2, + adapter, 2); + if (ret) { + pr_err("can't register for rq2 ret=%x\n", ret); + goto out_kill_hwr1q; + } + } + + if (init_attr->rq_count > 2) { + ret = ehea_qp_alloc_register(qp, &qp->hw_rqueue3, + init_attr->nr_rq3_pages, + wqe_size_in_bytes_rq3, + init_attr->act_wqe_size_enc_rq3, + adapter, 3); + if (ret) { + pr_err("can't register for rq3 ret=%x\n", ret); + goto out_kill_hwr2q; + } + } + + qp->init_attr = *init_attr; + + return qp; + +out_kill_hwr2q: + hw_queue_dtor(&qp->hw_rqueue2); + +out_kill_hwr1q: + hw_queue_dtor(&qp->hw_rqueue1); + +out_kill_hwsq: + hw_queue_dtor(&qp->hw_squeue); + +out_freeres: + ehea_h_disable_and_get_hea(adapter->handle, qp->fw_handle); + ehea_h_free_resource(adapter->handle, qp->fw_handle, FORCE_FREE); + +out_freemem: + kfree(qp); + return NULL; +} + +static u64 ehea_destroy_qp_res(struct ehea_qp *qp, u64 force) +{ + u64 hret; + struct ehea_qp_init_attr *qp_attr = &qp->init_attr; + + + ehea_h_disable_and_get_hea(qp->adapter->handle, qp->fw_handle); + hret = ehea_h_free_resource(qp->adapter->handle, qp->fw_handle, force); + if (hret != H_SUCCESS) + return hret; + + hw_queue_dtor(&qp->hw_squeue); + hw_queue_dtor(&qp->hw_rqueue1); + + if (qp_attr->rq_count > 1) + hw_queue_dtor(&qp->hw_rqueue2); + if (qp_attr->rq_count > 2) + hw_queue_dtor(&qp->hw_rqueue3); + kfree(qp); + + return hret; +} + +int ehea_destroy_qp(struct ehea_qp *qp) +{ + u64 hret, aer, aerr; + if (!qp) + return 0; + + hcp_epas_dtor(&qp->epas); + + hret = ehea_destroy_qp_res(qp, NORMAL_FREE); + if (hret == H_R_STATE) { + ehea_error_data(qp->adapter, qp->fw_handle, &aer, &aerr); + hret = ehea_destroy_qp_res(qp, FORCE_FREE); + } + + if (hret != H_SUCCESS) { + pr_err("destroy QP failed\n"); + return -EIO; + } + + return 0; +} + +static inline int ehea_calc_index(unsigned long i, unsigned long s) +{ + return (i >> s) & EHEA_INDEX_MASK; +} + +static inline int ehea_init_top_bmap(struct ehea_top_bmap *ehea_top_bmap, + int dir) +{ + if (!ehea_top_bmap->dir[dir]) { + ehea_top_bmap->dir[dir] = + kzalloc(sizeof(struct ehea_dir_bmap), GFP_KERNEL); + if (!ehea_top_bmap->dir[dir]) + return -ENOMEM; + } + return 0; +} + +static inline int ehea_init_bmap(struct ehea_bmap *ehea_bmap, int top, int dir) +{ + if (!ehea_bmap->top[top]) { + ehea_bmap->top[top] = + kzalloc(sizeof(struct ehea_top_bmap), GFP_KERNEL); + if (!ehea_bmap->top[top]) + return -ENOMEM; + } + return ehea_init_top_bmap(ehea_bmap->top[top], dir); +} + +static DEFINE_MUTEX(ehea_busmap_mutex); +static unsigned long ehea_mr_len; + +#define EHEA_BUSMAP_ADD_SECT 1 +#define EHEA_BUSMAP_REM_SECT 0 + +static void ehea_rebuild_busmap(void) +{ + u64 vaddr = EHEA_BUSMAP_START; + int top, dir, idx; + + for (top = 0; top < EHEA_MAP_ENTRIES; top++) { + struct ehea_top_bmap *ehea_top; + int valid_dir_entries = 0; + + if (!ehea_bmap->top[top]) + continue; + ehea_top = ehea_bmap->top[top]; + for (dir = 0; dir < EHEA_MAP_ENTRIES; dir++) { + struct ehea_dir_bmap *ehea_dir; + int valid_entries = 0; + + if (!ehea_top->dir[dir]) + continue; + valid_dir_entries++; + ehea_dir = ehea_top->dir[dir]; + for (idx = 0; idx < EHEA_MAP_ENTRIES; idx++) { + if (!ehea_dir->ent[idx]) + continue; + valid_entries++; + ehea_dir->ent[idx] = vaddr; + vaddr += EHEA_SECTSIZE; + } + if (!valid_entries) { + ehea_top->dir[dir] = NULL; + kfree(ehea_dir); + } + } + if (!valid_dir_entries) { + ehea_bmap->top[top] = NULL; + kfree(ehea_top); + } + } +} + +static int ehea_update_busmap(unsigned long pfn, unsigned long nr_pages, int add) +{ + unsigned long i, start_section, end_section; + + if (!nr_pages) + return 0; + + if (!ehea_bmap) { + ehea_bmap = kzalloc(sizeof(struct ehea_bmap), GFP_KERNEL); + if (!ehea_bmap) + return -ENOMEM; + } + + start_section = (pfn * PAGE_SIZE) / EHEA_SECTSIZE; + end_section = start_section + ((nr_pages * PAGE_SIZE) / EHEA_SECTSIZE); + /* Mark entries as valid or invalid only; address is assigned later */ + for (i = start_section; i < end_section; i++) { + u64 flag; + int top = ehea_calc_index(i, EHEA_TOP_INDEX_SHIFT); + int dir = ehea_calc_index(i, EHEA_DIR_INDEX_SHIFT); + int idx = i & EHEA_INDEX_MASK; + + if (add) { + int ret = ehea_init_bmap(ehea_bmap, top, dir); + if (ret) + return ret; + flag = 1; /* valid */ + ehea_mr_len += EHEA_SECTSIZE; + } else { + if (!ehea_bmap->top[top]) + continue; + if (!ehea_bmap->top[top]->dir[dir]) + continue; + flag = 0; /* invalid */ + ehea_mr_len -= EHEA_SECTSIZE; + } + + ehea_bmap->top[top]->dir[dir]->ent[idx] = flag; + } + ehea_rebuild_busmap(); /* Assign contiguous addresses for mr */ + return 0; +} + +int ehea_add_sect_bmap(unsigned long pfn, unsigned long nr_pages) +{ + int ret; + + mutex_lock(&ehea_busmap_mutex); + ret = ehea_update_busmap(pfn, nr_pages, EHEA_BUSMAP_ADD_SECT); + mutex_unlock(&ehea_busmap_mutex); + return ret; +} + +int ehea_rem_sect_bmap(unsigned long pfn, unsigned long nr_pages) +{ + int ret; + + mutex_lock(&ehea_busmap_mutex); + ret = ehea_update_busmap(pfn, nr_pages, EHEA_BUSMAP_REM_SECT); + mutex_unlock(&ehea_busmap_mutex); + return ret; +} + +static int ehea_is_hugepage(unsigned long pfn) +{ + if (pfn & EHEA_HUGEPAGE_PFN_MASK) + return 0; + + if (page_shift(pfn_to_page(pfn)) != EHEA_HUGEPAGESHIFT) + return 0; + + return 1; +} + +static int ehea_create_busmap_callback(unsigned long initial_pfn, + unsigned long total_nr_pages, void *arg) +{ + int ret; + unsigned long pfn, start_pfn, end_pfn, nr_pages; + + if ((total_nr_pages * PAGE_SIZE) < EHEA_HUGEPAGE_SIZE) + return ehea_update_busmap(initial_pfn, total_nr_pages, + EHEA_BUSMAP_ADD_SECT); + + /* Given chunk is >= 16GB -> check for hugepages */ + start_pfn = initial_pfn; + end_pfn = initial_pfn + total_nr_pages; + pfn = start_pfn; + + while (pfn < end_pfn) { + if (ehea_is_hugepage(pfn)) { + /* Add mem found in front of the hugepage */ + nr_pages = pfn - start_pfn; + ret = ehea_update_busmap(start_pfn, nr_pages, + EHEA_BUSMAP_ADD_SECT); + if (ret) + return ret; + + /* Skip the hugepage */ + pfn += (EHEA_HUGEPAGE_SIZE / PAGE_SIZE); + start_pfn = pfn; + } else + pfn += (EHEA_SECTSIZE / PAGE_SIZE); + } + + /* Add mem found behind the hugepage(s) */ + nr_pages = pfn - start_pfn; + return ehea_update_busmap(start_pfn, nr_pages, EHEA_BUSMAP_ADD_SECT); +} + +int ehea_create_busmap(void) +{ + int ret; + + mutex_lock(&ehea_busmap_mutex); + ehea_mr_len = 0; + ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, + ehea_create_busmap_callback); + mutex_unlock(&ehea_busmap_mutex); + return ret; +} + +void ehea_destroy_busmap(void) +{ + int top, dir; + mutex_lock(&ehea_busmap_mutex); + if (!ehea_bmap) + goto out_destroy; + + for (top = 0; top < EHEA_MAP_ENTRIES; top++) { + if (!ehea_bmap->top[top]) + continue; + + for (dir = 0; dir < EHEA_MAP_ENTRIES; dir++) { + if (!ehea_bmap->top[top]->dir[dir]) + continue; + + kfree(ehea_bmap->top[top]->dir[dir]); + } + + kfree(ehea_bmap->top[top]); + } + + kfree(ehea_bmap); + ehea_bmap = NULL; +out_destroy: + mutex_unlock(&ehea_busmap_mutex); +} + +u64 ehea_map_vaddr(void *caddr) +{ + int top, dir, idx; + unsigned long index, offset; + + if (!ehea_bmap) + return EHEA_INVAL_ADDR; + + index = __pa(caddr) >> SECTION_SIZE_BITS; + top = (index >> EHEA_TOP_INDEX_SHIFT) & EHEA_INDEX_MASK; + if (!ehea_bmap->top[top]) + return EHEA_INVAL_ADDR; + + dir = (index >> EHEA_DIR_INDEX_SHIFT) & EHEA_INDEX_MASK; + if (!ehea_bmap->top[top]->dir[dir]) + return EHEA_INVAL_ADDR; + + idx = index & EHEA_INDEX_MASK; + if (!ehea_bmap->top[top]->dir[dir]->ent[idx]) + return EHEA_INVAL_ADDR; + + offset = (unsigned long)caddr & (EHEA_SECTSIZE - 1); + return ehea_bmap->top[top]->dir[dir]->ent[idx] | offset; +} + +static inline void *ehea_calc_sectbase(int top, int dir, int idx) +{ + unsigned long ret = idx; + ret |= dir << EHEA_DIR_INDEX_SHIFT; + ret |= top << EHEA_TOP_INDEX_SHIFT; + return __va(ret << SECTION_SIZE_BITS); +} + +static u64 ehea_reg_mr_section(int top, int dir, int idx, u64 *pt, + struct ehea_adapter *adapter, + struct ehea_mr *mr) +{ + void *pg; + u64 j, m, hret; + unsigned long k = 0; + u64 pt_abs = __pa(pt); + + void *sectbase = ehea_calc_sectbase(top, dir, idx); + + for (j = 0; j < (EHEA_PAGES_PER_SECTION / EHEA_MAX_RPAGE); j++) { + + for (m = 0; m < EHEA_MAX_RPAGE; m++) { + pg = sectbase + ((k++) * EHEA_PAGESIZE); + pt[m] = __pa(pg); + } + hret = ehea_h_register_rpage_mr(adapter->handle, mr->handle, 0, + 0, pt_abs, EHEA_MAX_RPAGE); + + if ((hret != H_SUCCESS) && + (hret != H_PAGE_REGISTERED)) { + ehea_h_free_resource(adapter->handle, mr->handle, + FORCE_FREE); + pr_err("register_rpage_mr failed\n"); + return hret; + } + } + return hret; +} + +static u64 ehea_reg_mr_sections(int top, int dir, u64 *pt, + struct ehea_adapter *adapter, + struct ehea_mr *mr) +{ + u64 hret = H_SUCCESS; + int idx; + + for (idx = 0; idx < EHEA_MAP_ENTRIES; idx++) { + if (!ehea_bmap->top[top]->dir[dir]->ent[idx]) + continue; + + hret = ehea_reg_mr_section(top, dir, idx, pt, adapter, mr); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +static u64 ehea_reg_mr_dir_sections(int top, u64 *pt, + struct ehea_adapter *adapter, + struct ehea_mr *mr) +{ + u64 hret = H_SUCCESS; + int dir; + + for (dir = 0; dir < EHEA_MAP_ENTRIES; dir++) { + if (!ehea_bmap->top[top]->dir[dir]) + continue; + + hret = ehea_reg_mr_sections(top, dir, pt, adapter, mr); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +int ehea_reg_kernel_mr(struct ehea_adapter *adapter, struct ehea_mr *mr) +{ + int ret; + u64 *pt; + u64 hret; + u32 acc_ctrl = EHEA_MR_ACC_CTRL; + + unsigned long top; + + pt = (void *)get_zeroed_page(GFP_KERNEL); + if (!pt) { + pr_err("no mem\n"); + ret = -ENOMEM; + goto out; + } + + hret = ehea_h_alloc_resource_mr(adapter->handle, EHEA_BUSMAP_START, + ehea_mr_len, acc_ctrl, adapter->pd, + &mr->handle, &mr->lkey); + + if (hret != H_SUCCESS) { + pr_err("alloc_resource_mr failed\n"); + ret = -EIO; + goto out; + } + + if (!ehea_bmap) { + ehea_h_free_resource(adapter->handle, mr->handle, FORCE_FREE); + pr_err("no busmap available\n"); + ret = -EIO; + goto out; + } + + for (top = 0; top < EHEA_MAP_ENTRIES; top++) { + if (!ehea_bmap->top[top]) + continue; + + hret = ehea_reg_mr_dir_sections(top, pt, adapter, mr); + if((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) + break; + } + + if (hret != H_SUCCESS) { + ehea_h_free_resource(adapter->handle, mr->handle, FORCE_FREE); + pr_err("registering mr failed\n"); + ret = -EIO; + goto out; + } + + mr->vaddr = EHEA_BUSMAP_START; + mr->adapter = adapter; + ret = 0; +out: + free_page((unsigned long)pt); + return ret; +} + +int ehea_rem_mr(struct ehea_mr *mr) +{ + u64 hret; + + if (!mr || !mr->adapter) + return -EINVAL; + + hret = ehea_h_free_resource(mr->adapter->handle, mr->handle, + FORCE_FREE); + if (hret != H_SUCCESS) { + pr_err("destroy MR failed\n"); + return -EIO; + } + + return 0; +} + +int ehea_gen_smr(struct ehea_adapter *adapter, struct ehea_mr *old_mr, + struct ehea_mr *shared_mr) +{ + u64 hret; + + hret = ehea_h_register_smr(adapter->handle, old_mr->handle, + old_mr->vaddr, EHEA_MR_ACC_CTRL, + adapter->pd, shared_mr); + if (hret != H_SUCCESS) + return -EIO; + + shared_mr->adapter = adapter; + + return 0; +} + +static void print_error_data(u64 *data) +{ + int length; + u64 type = EHEA_BMASK_GET(ERROR_DATA_TYPE, data[2]); + u64 resource = data[1]; + + length = EHEA_BMASK_GET(ERROR_DATA_LENGTH, data[0]); + + if (length > EHEA_PAGESIZE) + length = EHEA_PAGESIZE; + + if (type == EHEA_AER_RESTYPE_QP) + pr_err("QP (resource=%llX) state: AER=0x%llX, AERR=0x%llX, port=%llX\n", + resource, data[6], data[12], data[22]); + else if (type == EHEA_AER_RESTYPE_CQ) + pr_err("CQ (resource=%llX) state: AER=0x%llX\n", + resource, data[6]); + else if (type == EHEA_AER_RESTYPE_EQ) + pr_err("EQ (resource=%llX) state: AER=0x%llX\n", + resource, data[6]); + + ehea_dump(data, length, "error data"); +} + +u64 ehea_error_data(struct ehea_adapter *adapter, u64 res_handle, + u64 *aer, u64 *aerr) +{ + unsigned long ret; + u64 *rblock; + u64 type = 0; + + rblock = (void *)get_zeroed_page(GFP_KERNEL); + if (!rblock) { + pr_err("Cannot allocate rblock memory\n"); + goto out; + } + + ret = ehea_h_error_data(adapter->handle, res_handle, rblock); + + if (ret == H_SUCCESS) { + type = EHEA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); + *aer = rblock[6]; + *aerr = rblock[12]; + print_error_data(rblock); + } else if (ret == H_R_STATE) { + pr_err("No error data available: %llX\n", res_handle); + } else + pr_err("Error data could not be fetched: %llX\n", res_handle); + + free_page((unsigned long)rblock); +out: + return type; +} diff --git a/drivers/net/ethernet/ibm/ehea/ehea_qmr.h b/drivers/net/ethernet/ibm/ehea/ehea_qmr.h new file mode 100644 index 000000000..7c7cccd82 --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_qmr.h @@ -0,0 +1,390 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/drivers/net/ethernet/ibm/ehea/ehea_qmr.h + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + */ + +#ifndef __EHEA_QMR_H__ +#define __EHEA_QMR_H__ + +#include <linux/prefetch.h> +#include "ehea.h" +#include "ehea_hw.h" + +/* + * page size of ehea hardware queues + */ + +#define EHEA_PAGESHIFT 12 +#define EHEA_PAGESIZE (1UL << EHEA_PAGESHIFT) +#define EHEA_SECTSIZE (1UL << 24) +#define EHEA_PAGES_PER_SECTION (EHEA_SECTSIZE >> EHEA_PAGESHIFT) +#define EHEA_HUGEPAGESHIFT 34 +#define EHEA_HUGEPAGE_SIZE (1UL << EHEA_HUGEPAGESHIFT) +#define EHEA_HUGEPAGE_PFN_MASK ((EHEA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) + +#if ((1UL << SECTION_SIZE_BITS) < EHEA_SECTSIZE) +#error eHEA module cannot work if kernel sectionsize < ehea sectionsize +#endif + +/* Some abbreviations used here: + * + * WQE - Work Queue Entry + * SWQE - Send Work Queue Entry + * RWQE - Receive Work Queue Entry + * CQE - Completion Queue Entry + * EQE - Event Queue Entry + * MR - Memory Region + */ + +/* Use of WR_ID field for EHEA */ +#define EHEA_WR_ID_COUNT EHEA_BMASK_IBM(0, 19) +#define EHEA_WR_ID_TYPE EHEA_BMASK_IBM(20, 23) +#define EHEA_SWQE2_TYPE 0x1 +#define EHEA_SWQE3_TYPE 0x2 +#define EHEA_RWQE2_TYPE 0x3 +#define EHEA_RWQE3_TYPE 0x4 +#define EHEA_WR_ID_INDEX EHEA_BMASK_IBM(24, 47) +#define EHEA_WR_ID_REFILL EHEA_BMASK_IBM(48, 63) + +struct ehea_vsgentry { + u64 vaddr; + u32 l_key; + u32 len; +}; + +/* maximum number of sg entries allowed in a WQE */ +#define EHEA_MAX_WQE_SG_ENTRIES 252 +#define SWQE2_MAX_IMM (0xD0 - 0x30) +#define SWQE3_MAX_IMM 224 + +/* tx control flags for swqe */ +#define EHEA_SWQE_CRC 0x8000 +#define EHEA_SWQE_IP_CHECKSUM 0x4000 +#define EHEA_SWQE_TCP_CHECKSUM 0x2000 +#define EHEA_SWQE_TSO 0x1000 +#define EHEA_SWQE_SIGNALLED_COMPLETION 0x0800 +#define EHEA_SWQE_VLAN_INSERT 0x0400 +#define EHEA_SWQE_IMM_DATA_PRESENT 0x0200 +#define EHEA_SWQE_DESCRIPTORS_PRESENT 0x0100 +#define EHEA_SWQE_WRAP_CTL_REC 0x0080 +#define EHEA_SWQE_WRAP_CTL_FORCE 0x0040 +#define EHEA_SWQE_BIND 0x0020 +#define EHEA_SWQE_PURGE 0x0010 + +/* sizeof(struct ehea_swqe) less the union */ +#define SWQE_HEADER_SIZE 32 + +struct ehea_swqe { + u64 wr_id; + u16 tx_control; + u16 vlan_tag; + u8 reserved1; + u8 ip_start; + u8 ip_end; + u8 immediate_data_length; + u8 tcp_offset; + u8 reserved2; + u16 reserved2b; + u8 wrap_tag; + u8 descriptors; /* number of valid descriptors in WQE */ + u16 reserved3; + u16 reserved4; + u16 mss; + u32 reserved5; + union { + /* Send WQE Format 1 */ + struct { + struct ehea_vsgentry sg_list[EHEA_MAX_WQE_SG_ENTRIES]; + } no_immediate_data; + + /* Send WQE Format 2 */ + struct { + struct ehea_vsgentry sg_entry; + /* 0x30 */ + u8 immediate_data[SWQE2_MAX_IMM]; + /* 0xd0 */ + struct ehea_vsgentry sg_list[EHEA_MAX_WQE_SG_ENTRIES-1]; + } immdata_desc __packed; + + /* Send WQE Format 3 */ + struct { + u8 immediate_data[SWQE3_MAX_IMM]; + } immdata_nodesc; + } u; +}; + +struct ehea_rwqe { + u64 wr_id; /* work request ID */ + u8 reserved1[5]; + u8 data_segments; + u16 reserved2; + u64 reserved3; + u64 reserved4; + struct ehea_vsgentry sg_list[EHEA_MAX_WQE_SG_ENTRIES]; +}; + +#define EHEA_CQE_VLAN_TAG_XTRACT 0x0400 + +#define EHEA_CQE_TYPE_RQ 0x60 +#define EHEA_CQE_STAT_ERR_MASK 0x700F +#define EHEA_CQE_STAT_FAT_ERR_MASK 0xF +#define EHEA_CQE_BLIND_CKSUM 0x8000 +#define EHEA_CQE_STAT_ERR_TCP 0x4000 +#define EHEA_CQE_STAT_ERR_IP 0x2000 +#define EHEA_CQE_STAT_ERR_CRC 0x1000 + +/* Defines which bad send cqe stati lead to a port reset */ +#define EHEA_CQE_STAT_RESET_MASK 0x0002 + +struct ehea_cqe { + u64 wr_id; /* work request ID from WQE */ + u8 type; + u8 valid; + u16 status; + u16 reserved1; + u16 num_bytes_transfered; + u16 vlan_tag; + u16 inet_checksum_value; + u8 reserved2; + u8 header_length; + u16 reserved3; + u16 page_offset; + u16 wqe_count; + u32 qp_token; + u32 timestamp; + u32 reserved4; + u64 reserved5[3]; +}; + +#define EHEA_EQE_VALID EHEA_BMASK_IBM(0, 0) +#define EHEA_EQE_IS_CQE EHEA_BMASK_IBM(1, 1) +#define EHEA_EQE_IDENTIFIER EHEA_BMASK_IBM(2, 7) +#define EHEA_EQE_QP_CQ_NUMBER EHEA_BMASK_IBM(8, 31) +#define EHEA_EQE_QP_TOKEN EHEA_BMASK_IBM(32, 63) +#define EHEA_EQE_CQ_TOKEN EHEA_BMASK_IBM(32, 63) +#define EHEA_EQE_KEY EHEA_BMASK_IBM(32, 63) +#define EHEA_EQE_PORT_NUMBER EHEA_BMASK_IBM(56, 63) +#define EHEA_EQE_EQ_NUMBER EHEA_BMASK_IBM(48, 63) +#define EHEA_EQE_SM_ID EHEA_BMASK_IBM(48, 63) +#define EHEA_EQE_SM_MECH_NUMBER EHEA_BMASK_IBM(48, 55) +#define EHEA_EQE_SM_PORT_NUMBER EHEA_BMASK_IBM(56, 63) + +#define EHEA_AER_RESTYPE_QP 0x8 +#define EHEA_AER_RESTYPE_CQ 0x4 +#define EHEA_AER_RESTYPE_EQ 0x3 + +/* Defines which affiliated errors lead to a port reset */ +#define EHEA_AER_RESET_MASK 0xFFFFFFFFFEFFFFFFULL +#define EHEA_AERR_RESET_MASK 0xFFFFFFFFFFFFFFFFULL + +struct ehea_eqe { + u64 entry; +}; + +#define ERROR_DATA_LENGTH EHEA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHEA_BMASK_IBM(0, 7) + +static inline void *hw_qeit_calc(struct hw_queue *queue, u64 q_offset) +{ + struct ehea_page *current_page; + + if (q_offset >= queue->queue_length) + q_offset -= queue->queue_length; + current_page = (queue->queue_pages)[q_offset >> EHEA_PAGESHIFT]; + return ¤t_page->entries[q_offset & (EHEA_PAGESIZE - 1)]; +} + +static inline void *hw_qeit_get(struct hw_queue *queue) +{ + return hw_qeit_calc(queue, queue->current_q_offset); +} + +static inline void hw_qeit_inc(struct hw_queue *queue) +{ + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset >= queue->queue_length) { + queue->current_q_offset = 0; + /* toggle the valid flag */ + queue->toggle_state = (~queue->toggle_state) & 1; + } +} + +static inline void *hw_qeit_get_inc(struct hw_queue *queue) +{ + void *retvalue = hw_qeit_get(queue); + hw_qeit_inc(queue); + return retvalue; +} + +static inline void *hw_qeit_get_inc_valid(struct hw_queue *queue) +{ + struct ehea_cqe *retvalue = hw_qeit_get(queue); + u8 valid = retvalue->valid; + void *pref; + + if ((valid >> 7) == (queue->toggle_state & 1)) { + /* this is a good one */ + hw_qeit_inc(queue); + pref = hw_qeit_calc(queue, queue->current_q_offset); + prefetch(pref); + prefetch(pref + 128); + } else + retvalue = NULL; + return retvalue; +} + +static inline void *hw_qeit_get_valid(struct hw_queue *queue) +{ + struct ehea_cqe *retvalue = hw_qeit_get(queue); + void *pref; + u8 valid; + + pref = hw_qeit_calc(queue, queue->current_q_offset); + prefetch(pref); + prefetch(pref + 128); + prefetch(pref + 256); + valid = retvalue->valid; + if (!((valid >> 7) == (queue->toggle_state & 1))) + retvalue = NULL; + return retvalue; +} + +static inline void *hw_qeit_reset(struct hw_queue *queue) +{ + queue->current_q_offset = 0; + return hw_qeit_get(queue); +} + +static inline void *hw_qeit_eq_get_inc(struct hw_queue *queue) +{ + u64 last_entry_in_q = queue->queue_length - queue->qe_size; + void *retvalue; + + retvalue = hw_qeit_get(queue); + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset > last_entry_in_q) { + queue->current_q_offset = 0; + queue->toggle_state = (~queue->toggle_state) & 1; + } + return retvalue; +} + +static inline void *hw_eqit_eq_get_inc_valid(struct hw_queue *queue) +{ + void *retvalue = hw_qeit_get(queue); + u32 qe = *(u8 *)retvalue; + if ((qe >> 7) == (queue->toggle_state & 1)) + hw_qeit_eq_get_inc(queue); + else + retvalue = NULL; + return retvalue; +} + +static inline struct ehea_rwqe *ehea_get_next_rwqe(struct ehea_qp *qp, + int rq_nr) +{ + struct hw_queue *queue; + + if (rq_nr == 1) + queue = &qp->hw_rqueue1; + else if (rq_nr == 2) + queue = &qp->hw_rqueue2; + else + queue = &qp->hw_rqueue3; + + return hw_qeit_get_inc(queue); +} + +static inline struct ehea_swqe *ehea_get_swqe(struct ehea_qp *my_qp, + int *wqe_index) +{ + struct hw_queue *queue = &my_qp->hw_squeue; + struct ehea_swqe *wqe_p; + + *wqe_index = (queue->current_q_offset) >> (7 + EHEA_SG_SQ); + wqe_p = hw_qeit_get_inc(&my_qp->hw_squeue); + + return wqe_p; +} + +static inline void ehea_post_swqe(struct ehea_qp *my_qp, struct ehea_swqe *swqe) +{ + iosync(); + ehea_update_sqa(my_qp, 1); +} + +static inline struct ehea_cqe *ehea_poll_rq1(struct ehea_qp *qp, int *wqe_index) +{ + struct hw_queue *queue = &qp->hw_rqueue1; + + *wqe_index = (queue->current_q_offset) >> (7 + EHEA_SG_RQ1); + return hw_qeit_get_valid(queue); +} + +static inline void ehea_inc_cq(struct ehea_cq *cq) +{ + hw_qeit_inc(&cq->hw_queue); +} + +static inline void ehea_inc_rq1(struct ehea_qp *qp) +{ + hw_qeit_inc(&qp->hw_rqueue1); +} + +static inline struct ehea_cqe *ehea_poll_cq(struct ehea_cq *my_cq) +{ + return hw_qeit_get_valid(&my_cq->hw_queue); +} + +#define EHEA_CQ_REGISTER_ORIG 0 +#define EHEA_EQ_REGISTER_ORIG 0 + +enum ehea_eq_type { + EHEA_EQ = 0, /* event queue */ + EHEA_NEQ /* notification event queue */ +}; + +struct ehea_eq *ehea_create_eq(struct ehea_adapter *adapter, + enum ehea_eq_type type, + const u32 length, const u8 eqe_gen); + +int ehea_destroy_eq(struct ehea_eq *eq); + +struct ehea_eqe *ehea_poll_eq(struct ehea_eq *eq); + +struct ehea_cq *ehea_create_cq(struct ehea_adapter *adapter, int cqe, + u64 eq_handle, u32 cq_token); + +int ehea_destroy_cq(struct ehea_cq *cq); + +struct ehea_qp *ehea_create_qp(struct ehea_adapter *adapter, u32 pd, + struct ehea_qp_init_attr *init_attr); + +int ehea_destroy_qp(struct ehea_qp *qp); + +int ehea_reg_kernel_mr(struct ehea_adapter *adapter, struct ehea_mr *mr); + +int ehea_gen_smr(struct ehea_adapter *adapter, struct ehea_mr *old_mr, + struct ehea_mr *shared_mr); + +int ehea_rem_mr(struct ehea_mr *mr); + +u64 ehea_error_data(struct ehea_adapter *adapter, u64 res_handle, + u64 *aer, u64 *aerr); + +int ehea_add_sect_bmap(unsigned long pfn, unsigned long nr_pages); +int ehea_rem_sect_bmap(unsigned long pfn, unsigned long nr_pages); +int ehea_create_busmap(void); +void ehea_destroy_busmap(void); +u64 ehea_map_vaddr(void *caddr); + +#endif /* __EHEA_QMR_H__ */ diff --git a/drivers/net/ethernet/ibm/emac/Kconfig b/drivers/net/ethernet/ibm/emac/Kconfig new file mode 100644 index 000000000..c8e5de598 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/Kconfig @@ -0,0 +1,66 @@ +# SPDX-License-Identifier: GPL-2.0-only +config IBM_EMAC + tristate "IBM EMAC Ethernet support" + depends on PPC_DCR + select CRC32 + select PHYLIB + help + This driver supports the IBM EMAC family of Ethernet controllers + typically found on 4xx embedded PowerPC chips, but also on the + Axon southbridge for Cell. + +config IBM_EMAC_RXB + int "Number of receive buffers" + depends on IBM_EMAC + default "128" + +config IBM_EMAC_TXB + int "Number of transmit buffers" + depends on IBM_EMAC + default "64" + +config IBM_EMAC_POLL_WEIGHT + int "MAL NAPI polling weight" + depends on IBM_EMAC + default "32" + +config IBM_EMAC_RX_COPY_THRESHOLD + int "RX skb copy threshold (bytes)" + depends on IBM_EMAC + default "256" + +config IBM_EMAC_DEBUG + bool "Debugging" + depends on IBM_EMAC + default n + +# The options below has to be select'ed by the respective +# processor types or platforms + +config IBM_EMAC_ZMII + bool + default n + +config IBM_EMAC_RGMII + bool + default n + +config IBM_EMAC_TAH + bool + default n + +config IBM_EMAC_EMAC4 + bool + default n + +config IBM_EMAC_NO_FLOW_CTRL + bool + default n + +config IBM_EMAC_MAL_CLR_ICINTSTAT + bool + default n + +config IBM_EMAC_MAL_COMMON_ERR + bool + default n diff --git a/drivers/net/ethernet/ibm/emac/Makefile b/drivers/net/ethernet/ibm/emac/Makefile new file mode 100644 index 000000000..ddf1ce3c8 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the PowerPC 4xx on-chip ethernet driver +# + +obj-$(CONFIG_IBM_EMAC) += ibm_emac.o + +ibm_emac-y := mal.o core.o phy.o +ibm_emac-$(CONFIG_IBM_EMAC_ZMII) += zmii.o +ibm_emac-$(CONFIG_IBM_EMAC_RGMII) += rgmii.o +ibm_emac-$(CONFIG_IBM_EMAC_TAH) += tah.o diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c new file mode 100644 index 000000000..9b08e41cc --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -0,0 +1,3417 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * drivers/net/ethernet/ibm/emac/core.c + * + * Driver for PowerPC 4xx on-chip ethernet controller. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Matt Porter <mporter@kernel.crashing.org> + * (c) 2003 Benjamin Herrenschmidt <benh@kernel.crashing.org> + * Armin Kuster <akuster@mvista.com> + * Johnnie Peters <jpeters@mvista.com> + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/delay.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/crc32.h> +#include <linux/ethtool.h> +#include <linux/mii.h> +#include <linux/bitops.h> +#include <linux/workqueue.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/of_net.h> +#include <linux/of_mdio.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/dma.h> +#include <linux/uaccess.h> +#include <asm/dcr.h> +#include <asm/dcr-regs.h> + +#include "core.h" + +/* + * Lack of dma_unmap_???? calls is intentional. + * + * API-correct usage requires additional support state information to be + * maintained for every RX and TX buffer descriptor (BD). Unfortunately, due to + * EMAC design (e.g. TX buffer passed from network stack can be split into + * several BDs, dma_map_single/dma_map_page can be used to map particular BD), + * maintaining such information will add additional overhead. + * Current DMA API implementation for 4xx processors only ensures cache coherency + * and dma_unmap_???? routines are empty and are likely to stay this way. + * I decided to omit dma_unmap_??? calls because I don't want to add additional + * complexity just for the sake of following some abstract API, when it doesn't + * add any real benefit to the driver. I understand that this decision maybe + * controversial, but I really tried to make code API-correct and efficient + * at the same time and didn't come up with code I liked :(. --ebs + */ + +#define DRV_NAME "emac" +#define DRV_VERSION "3.54" +#define DRV_DESC "PPC 4xx OCP EMAC driver" + +MODULE_DESCRIPTION(DRV_DESC); +MODULE_AUTHOR + ("Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>"); +MODULE_LICENSE("GPL"); + +/* minimum number of free TX descriptors required to wake up TX process */ +#define EMAC_TX_WAKEUP_THRESH (NUM_TX_BUFF / 4) + +/* If packet size is less than this number, we allocate small skb and copy packet + * contents into it instead of just sending original big skb up + */ +#define EMAC_RX_COPY_THRESH CONFIG_IBM_EMAC_RX_COPY_THRESHOLD + +/* Since multiple EMACs share MDIO lines in various ways, we need + * to avoid re-using the same PHY ID in cases where the arch didn't + * setup precise phy_map entries + * + * XXX This is something that needs to be reworked as we can have multiple + * EMAC "sets" (multiple ASICs containing several EMACs) though we can + * probably require in that case to have explicit PHY IDs in the device-tree + */ +static u32 busy_phy_map; +static DEFINE_MUTEX(emac_phy_map_lock); + +/* This is the wait queue used to wait on any event related to probe, that + * is discovery of MALs, other EMACs, ZMII/RGMIIs, etc... + */ +static DECLARE_WAIT_QUEUE_HEAD(emac_probe_wait); + +/* Having stable interface names is a doomed idea. However, it would be nice + * if we didn't have completely random interface names at boot too :-) It's + * just a matter of making everybody's life easier. Since we are doing + * threaded probing, it's a bit harder though. The base idea here is that + * we make up a list of all emacs in the device-tree before we register the + * driver. Every emac will then wait for the previous one in the list to + * initialize before itself. We should also keep that list ordered by + * cell_index. + * That list is only 4 entries long, meaning that additional EMACs don't + * get ordering guarantees unless EMAC_BOOT_LIST_SIZE is increased. + */ + +#define EMAC_BOOT_LIST_SIZE 4 +static struct device_node *emac_boot_list[EMAC_BOOT_LIST_SIZE]; + +/* How long should I wait for dependent devices ? */ +#define EMAC_PROBE_DEP_TIMEOUT (HZ * 5) + +/* I don't want to litter system log with timeout errors + * when we have brain-damaged PHY. + */ +static inline void emac_report_timeout_error(struct emac_instance *dev, + const char *error) +{ + if (emac_has_feature(dev, EMAC_FTR_440GX_PHY_CLK_FIX | + EMAC_FTR_460EX_PHY_CLK_FIX | + EMAC_FTR_440EP_PHY_CLK_FIX)) + DBG(dev, "%s" NL, error); + else if (net_ratelimit()) + printk(KERN_ERR "%pOF: %s\n", dev->ofdev->dev.of_node, error); +} + +/* EMAC PHY clock workaround: + * 440EP/440GR has more sane SDR0_MFR register implementation than 440GX, + * which allows controlling each EMAC clock + */ +static inline void emac_rx_clk_tx(struct emac_instance *dev) +{ +#ifdef CONFIG_PPC_DCR_NATIVE + if (emac_has_feature(dev, EMAC_FTR_440EP_PHY_CLK_FIX)) + dcri_clrset(SDR0, SDR0_MFR, + 0, SDR0_MFR_ECS >> dev->cell_index); +#endif +} + +static inline void emac_rx_clk_default(struct emac_instance *dev) +{ +#ifdef CONFIG_PPC_DCR_NATIVE + if (emac_has_feature(dev, EMAC_FTR_440EP_PHY_CLK_FIX)) + dcri_clrset(SDR0, SDR0_MFR, + SDR0_MFR_ECS >> dev->cell_index, 0); +#endif +} + +/* PHY polling intervals */ +#define PHY_POLL_LINK_ON HZ +#define PHY_POLL_LINK_OFF (HZ / 5) + +/* Graceful stop timeouts in us. + * We should allow up to 1 frame time (full-duplex, ignoring collisions) + */ +#define STOP_TIMEOUT_10 1230 +#define STOP_TIMEOUT_100 124 +#define STOP_TIMEOUT_1000 13 +#define STOP_TIMEOUT_1000_JUMBO 73 + +static unsigned char default_mcast_addr[] = { + 0x01, 0x80, 0xC2, 0x00, 0x00, 0x01 +}; + +/* Please, keep in sync with struct ibm_emac_stats/ibm_emac_error_stats */ +static const char emac_stats_keys[EMAC_ETHTOOL_STATS_COUNT][ETH_GSTRING_LEN] = { + "rx_packets", "rx_bytes", "tx_packets", "tx_bytes", "rx_packets_csum", + "tx_packets_csum", "tx_undo", "rx_dropped_stack", "rx_dropped_oom", + "rx_dropped_error", "rx_dropped_resize", "rx_dropped_mtu", + "rx_stopped", "rx_bd_errors", "rx_bd_overrun", "rx_bd_bad_packet", + "rx_bd_runt_packet", "rx_bd_short_event", "rx_bd_alignment_error", + "rx_bd_bad_fcs", "rx_bd_packet_too_long", "rx_bd_out_of_range", + "rx_bd_in_range", "rx_parity", "rx_fifo_overrun", "rx_overrun", + "rx_bad_packet", "rx_runt_packet", "rx_short_event", + "rx_alignment_error", "rx_bad_fcs", "rx_packet_too_long", + "rx_out_of_range", "rx_in_range", "tx_dropped", "tx_bd_errors", + "tx_bd_bad_fcs", "tx_bd_carrier_loss", "tx_bd_excessive_deferral", + "tx_bd_excessive_collisions", "tx_bd_late_collision", + "tx_bd_multple_collisions", "tx_bd_single_collision", + "tx_bd_underrun", "tx_bd_sqe", "tx_parity", "tx_underrun", "tx_sqe", + "tx_errors" +}; + +static irqreturn_t emac_irq(int irq, void *dev_instance); +static void emac_clean_tx_ring(struct emac_instance *dev); +static void __emac_set_multicast_list(struct emac_instance *dev); + +static inline int emac_phy_supports_gige(int phy_mode) +{ + return phy_interface_mode_is_rgmii(phy_mode) || + phy_mode == PHY_INTERFACE_MODE_GMII || + phy_mode == PHY_INTERFACE_MODE_SGMII || + phy_mode == PHY_INTERFACE_MODE_TBI || + phy_mode == PHY_INTERFACE_MODE_RTBI; +} + +static inline int emac_phy_gpcs(int phy_mode) +{ + return phy_mode == PHY_INTERFACE_MODE_SGMII || + phy_mode == PHY_INTERFACE_MODE_TBI || + phy_mode == PHY_INTERFACE_MODE_RTBI; +} + +static inline void emac_tx_enable(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r; + + DBG(dev, "tx_enable" NL); + + r = in_be32(&p->mr0); + if (!(r & EMAC_MR0_TXE)) + out_be32(&p->mr0, r | EMAC_MR0_TXE); +} + +static void emac_tx_disable(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r; + + DBG(dev, "tx_disable" NL); + + r = in_be32(&p->mr0); + if (r & EMAC_MR0_TXE) { + int n = dev->stop_timeout; + out_be32(&p->mr0, r & ~EMAC_MR0_TXE); + while (!(in_be32(&p->mr0) & EMAC_MR0_TXI) && n) { + udelay(1); + --n; + } + if (unlikely(!n)) + emac_report_timeout_error(dev, "TX disable timeout"); + } +} + +static void emac_rx_enable(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r; + + if (unlikely(test_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags))) + goto out; + + DBG(dev, "rx_enable" NL); + + r = in_be32(&p->mr0); + if (!(r & EMAC_MR0_RXE)) { + if (unlikely(!(r & EMAC_MR0_RXI))) { + /* Wait if previous async disable is still in progress */ + int n = dev->stop_timeout; + while (!(r = in_be32(&p->mr0) & EMAC_MR0_RXI) && n) { + udelay(1); + --n; + } + if (unlikely(!n)) + emac_report_timeout_error(dev, + "RX disable timeout"); + } + out_be32(&p->mr0, r | EMAC_MR0_RXE); + } + out: + ; +} + +static void emac_rx_disable(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r; + + DBG(dev, "rx_disable" NL); + + r = in_be32(&p->mr0); + if (r & EMAC_MR0_RXE) { + int n = dev->stop_timeout; + out_be32(&p->mr0, r & ~EMAC_MR0_RXE); + while (!(in_be32(&p->mr0) & EMAC_MR0_RXI) && n) { + udelay(1); + --n; + } + if (unlikely(!n)) + emac_report_timeout_error(dev, "RX disable timeout"); + } +} + +static inline void emac_netif_stop(struct emac_instance *dev) +{ + netif_tx_lock_bh(dev->ndev); + netif_addr_lock(dev->ndev); + dev->no_mcast = 1; + netif_addr_unlock(dev->ndev); + netif_tx_unlock_bh(dev->ndev); + netif_trans_update(dev->ndev); /* prevent tx timeout */ + mal_poll_disable(dev->mal, &dev->commac); + netif_tx_disable(dev->ndev); +} + +static inline void emac_netif_start(struct emac_instance *dev) +{ + netif_tx_lock_bh(dev->ndev); + netif_addr_lock(dev->ndev); + dev->no_mcast = 0; + if (dev->mcast_pending && netif_running(dev->ndev)) + __emac_set_multicast_list(dev); + netif_addr_unlock(dev->ndev); + netif_tx_unlock_bh(dev->ndev); + + netif_wake_queue(dev->ndev); + + /* NOTE: unconditional netif_wake_queue is only appropriate + * so long as all callers are assured to have free tx slots + * (taken from tg3... though the case where that is wrong is + * not terribly harmful) + */ + mal_poll_enable(dev->mal, &dev->commac); +} + +static inline void emac_rx_disable_async(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r; + + DBG(dev, "rx_disable_async" NL); + + r = in_be32(&p->mr0); + if (r & EMAC_MR0_RXE) + out_be32(&p->mr0, r & ~EMAC_MR0_RXE); +} + +static int emac_reset(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + int n = 20; + bool __maybe_unused try_internal_clock = false; + + DBG(dev, "reset" NL); + + if (!dev->reset_failed) { + /* 40x erratum suggests stopping RX channel before reset, + * we stop TX as well + */ + emac_rx_disable(dev); + emac_tx_disable(dev); + } + +#ifdef CONFIG_PPC_DCR_NATIVE +do_retry: + /* + * PPC460EX/GT Embedded Processor Advanced User's Manual + * section 28.10.1 Mode Register 0 (EMACx_MR0) states: + * Note: The PHY must provide a TX Clk in order to perform a soft reset + * of the EMAC. If none is present, select the internal clock + * (SDR0_ETH_CFG[EMACx_PHY_CLK] = 1). + * After a soft reset, select the external clock. + * + * The AR8035-A PHY Meraki MR24 does not provide a TX Clk if the + * ethernet cable is not attached. This causes the reset to timeout + * and the PHY detection code in emac_init_phy() is unable to + * communicate and detect the AR8035-A PHY. As a result, the emac + * driver bails out early and the user has no ethernet. + * In order to stay compatible with existing configurations, the + * driver will temporarily switch to the internal clock, after + * the first reset fails. + */ + if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) { + if (try_internal_clock || (dev->phy_address == 0xffffffff && + dev->phy_map == 0xffffffff)) { + /* No PHY: select internal loop clock before reset */ + dcri_clrset(SDR0, SDR0_ETH_CFG, + 0, SDR0_ETH_CFG_ECS << dev->cell_index); + } else { + /* PHY present: select external clock before reset */ + dcri_clrset(SDR0, SDR0_ETH_CFG, + SDR0_ETH_CFG_ECS << dev->cell_index, 0); + } + } +#endif + + out_be32(&p->mr0, EMAC_MR0_SRST); + while ((in_be32(&p->mr0) & EMAC_MR0_SRST) && n) + --n; + +#ifdef CONFIG_PPC_DCR_NATIVE + if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) { + if (!n && !try_internal_clock) { + /* first attempt has timed out. */ + n = 20; + try_internal_clock = true; + goto do_retry; + } + + if (try_internal_clock || (dev->phy_address == 0xffffffff && + dev->phy_map == 0xffffffff)) { + /* No PHY: restore external clock source after reset */ + dcri_clrset(SDR0, SDR0_ETH_CFG, + SDR0_ETH_CFG_ECS << dev->cell_index, 0); + } + } +#endif + + if (n) { + dev->reset_failed = 0; + return 0; + } else { + emac_report_timeout_error(dev, "reset timeout"); + dev->reset_failed = 1; + return -ETIMEDOUT; + } +} + +static void emac_hash_mc(struct emac_instance *dev) +{ + const int regs = EMAC_XAHT_REGS(dev); + u32 *gaht_base = emac_gaht_base(dev); + u32 gaht_temp[EMAC_XAHT_MAX_REGS]; + struct netdev_hw_addr *ha; + int i; + + DBG(dev, "hash_mc %d" NL, netdev_mc_count(dev->ndev)); + + memset(gaht_temp, 0, sizeof (gaht_temp)); + + netdev_for_each_mc_addr(ha, dev->ndev) { + int slot, reg, mask; + DBG2(dev, "mc %pM" NL, ha->addr); + + slot = EMAC_XAHT_CRC_TO_SLOT(dev, + ether_crc(ETH_ALEN, ha->addr)); + reg = EMAC_XAHT_SLOT_TO_REG(dev, slot); + mask = EMAC_XAHT_SLOT_TO_MASK(dev, slot); + + gaht_temp[reg] |= mask; + } + + for (i = 0; i < regs; i++) + out_be32(gaht_base + i, gaht_temp[i]); +} + +static inline u32 emac_iff2rmr(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + u32 r; + + r = EMAC_RMR_SP | EMAC_RMR_SFCS | EMAC_RMR_IAE | EMAC_RMR_BAE; + + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + r |= EMAC4_RMR_BASE; + else + r |= EMAC_RMR_BASE; + + if (ndev->flags & IFF_PROMISC) + r |= EMAC_RMR_PME; + else if (ndev->flags & IFF_ALLMULTI || + (netdev_mc_count(ndev) > EMAC_XAHT_SLOTS(dev))) + r |= EMAC_RMR_PMME; + else if (!netdev_mc_empty(ndev)) + r |= EMAC_RMR_MAE; + + if (emac_has_feature(dev, EMAC_APM821XX_REQ_JUMBO_FRAME_SIZE)) { + r &= ~EMAC4_RMR_MJS_MASK; + r |= EMAC4_RMR_MJS(ndev->mtu); + } + + return r; +} + +static u32 __emac_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_size) +{ + u32 ret = EMAC_MR1_VLE | EMAC_MR1_IST | EMAC_MR1_TR0_MULT; + + DBG2(dev, "__emac_calc_base_mr1" NL); + + switch(tx_size) { + case 2048: + ret |= EMAC_MR1_TFS_2K; + break; + default: + printk(KERN_WARNING "%s: Unknown Tx FIFO size %d\n", + dev->ndev->name, tx_size); + } + + switch(rx_size) { + case 16384: + ret |= EMAC_MR1_RFS_16K; + break; + case 4096: + ret |= EMAC_MR1_RFS_4K; + break; + default: + printk(KERN_WARNING "%s: Unknown Rx FIFO size %d\n", + dev->ndev->name, rx_size); + } + + return ret; +} + +static u32 __emac4_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_size) +{ + u32 ret = EMAC_MR1_VLE | EMAC_MR1_IST | EMAC4_MR1_TR | + EMAC4_MR1_OBCI(dev->opb_bus_freq / 1000000); + + DBG2(dev, "__emac4_calc_base_mr1" NL); + + switch(tx_size) { + case 16384: + ret |= EMAC4_MR1_TFS_16K; + break; + case 8192: + ret |= EMAC4_MR1_TFS_8K; + break; + case 4096: + ret |= EMAC4_MR1_TFS_4K; + break; + case 2048: + ret |= EMAC4_MR1_TFS_2K; + break; + default: + printk(KERN_WARNING "%s: Unknown Tx FIFO size %d\n", + dev->ndev->name, tx_size); + } + + switch(rx_size) { + case 16384: + ret |= EMAC4_MR1_RFS_16K; + break; + case 8192: + ret |= EMAC4_MR1_RFS_8K; + break; + case 4096: + ret |= EMAC4_MR1_RFS_4K; + break; + case 2048: + ret |= EMAC4_MR1_RFS_2K; + break; + default: + printk(KERN_WARNING "%s: Unknown Rx FIFO size %d\n", + dev->ndev->name, rx_size); + } + + return ret; +} + +static u32 emac_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_size) +{ + return emac_has_feature(dev, EMAC_FTR_EMAC4) ? + __emac4_calc_base_mr1(dev, tx_size, rx_size) : + __emac_calc_base_mr1(dev, tx_size, rx_size); +} + +static inline u32 emac_calc_trtr(struct emac_instance *dev, unsigned int size) +{ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + return ((size >> 6) - 1) << EMAC_TRTR_SHIFT_EMAC4; + else + return ((size >> 6) - 1) << EMAC_TRTR_SHIFT; +} + +static inline u32 emac_calc_rwmr(struct emac_instance *dev, + unsigned int low, unsigned int high) +{ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + return (low << 22) | ( (high & 0x3ff) << 6); + else + return (low << 23) | ( (high & 0x1ff) << 7); +} + +static int emac_configure(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + struct net_device *ndev = dev->ndev; + int tx_size, rx_size, link = netif_carrier_ok(dev->ndev); + u32 r, mr1 = 0; + + DBG(dev, "configure" NL); + + if (!link) { + out_be32(&p->mr1, in_be32(&p->mr1) + | EMAC_MR1_FDE | EMAC_MR1_ILE); + udelay(100); + } else if (emac_reset(dev) < 0) + return -ETIMEDOUT; + + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) + tah_reset(dev->tah_dev); + + DBG(dev, " link = %d duplex = %d, pause = %d, asym_pause = %d\n", + link, dev->phy.duplex, dev->phy.pause, dev->phy.asym_pause); + + /* Default fifo sizes */ + tx_size = dev->tx_fifo_size; + rx_size = dev->rx_fifo_size; + + /* No link, force loopback */ + if (!link) + mr1 = EMAC_MR1_FDE | EMAC_MR1_ILE; + + /* Check for full duplex */ + else if (dev->phy.duplex == DUPLEX_FULL) + mr1 |= EMAC_MR1_FDE | EMAC_MR1_MWSW_001; + + /* Adjust fifo sizes, mr1 and timeouts based on link speed */ + dev->stop_timeout = STOP_TIMEOUT_10; + switch (dev->phy.speed) { + case SPEED_1000: + if (emac_phy_gpcs(dev->phy.mode)) { + mr1 |= EMAC_MR1_MF_1000GPCS | EMAC_MR1_MF_IPPA( + (dev->phy.gpcs_address != 0xffffffff) ? + dev->phy.gpcs_address : dev->phy.address); + + /* Put some arbitrary OUI, Manuf & Rev IDs so we can + * identify this GPCS PHY later. + */ + out_be32(&p->u1.emac4.ipcr, 0xdeadbeef); + } else + mr1 |= EMAC_MR1_MF_1000; + + /* Extended fifo sizes */ + tx_size = dev->tx_fifo_size_gige; + rx_size = dev->rx_fifo_size_gige; + + if (dev->ndev->mtu > ETH_DATA_LEN) { + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + mr1 |= EMAC4_MR1_JPSM; + else + mr1 |= EMAC_MR1_JPSM; + dev->stop_timeout = STOP_TIMEOUT_1000_JUMBO; + } else + dev->stop_timeout = STOP_TIMEOUT_1000; + break; + case SPEED_100: + mr1 |= EMAC_MR1_MF_100; + dev->stop_timeout = STOP_TIMEOUT_100; + break; + default: /* make gcc happy */ + break; + } + + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_set_speed(dev->rgmii_dev, dev->rgmii_port, + dev->phy.speed); + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_set_speed(dev->zmii_dev, dev->zmii_port, dev->phy.speed); + + /* on 40x erratum forces us to NOT use integrated flow control, + * let's hope it works on 44x ;) + */ + if (!emac_has_feature(dev, EMAC_FTR_NO_FLOW_CONTROL_40x) && + dev->phy.duplex == DUPLEX_FULL) { + if (dev->phy.pause) + mr1 |= EMAC_MR1_EIFC | EMAC_MR1_APP; + else if (dev->phy.asym_pause) + mr1 |= EMAC_MR1_APP; + } + + /* Add base settings & fifo sizes & program MR1 */ + mr1 |= emac_calc_base_mr1(dev, tx_size, rx_size); + out_be32(&p->mr1, mr1); + + /* Set individual MAC address */ + out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]); + out_be32(&p->ialr, (ndev->dev_addr[2] << 24) | + (ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) | + ndev->dev_addr[5]); + + /* VLAN Tag Protocol ID */ + out_be32(&p->vtpid, 0x8100); + + /* Receive mode register */ + r = emac_iff2rmr(ndev); + if (r & EMAC_RMR_MAE) + emac_hash_mc(dev); + out_be32(&p->rmr, r); + + /* FIFOs thresholds */ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + r = EMAC4_TMR1((dev->mal_burst_size / dev->fifo_entry_size) + 1, + tx_size / 2 / dev->fifo_entry_size); + else + r = EMAC_TMR1((dev->mal_burst_size / dev->fifo_entry_size) + 1, + tx_size / 2 / dev->fifo_entry_size); + out_be32(&p->tmr1, r); + out_be32(&p->trtr, emac_calc_trtr(dev, tx_size / 2)); + + /* PAUSE frame is sent when RX FIFO reaches its high-water mark, + there should be still enough space in FIFO to allow the our link + partner time to process this frame and also time to send PAUSE + frame itself. + + Here is the worst case scenario for the RX FIFO "headroom" + (from "The Switch Book") (100Mbps, without preamble, inter-frame gap): + + 1) One maximum-length frame on TX 1522 bytes + 2) One PAUSE frame time 64 bytes + 3) PAUSE frame decode time allowance 64 bytes + 4) One maximum-length frame on RX 1522 bytes + 5) Round-trip propagation delay of the link (100Mb) 15 bytes + ---------- + 3187 bytes + + I chose to set high-water mark to RX_FIFO_SIZE / 4 (1024 bytes) + low-water mark to RX_FIFO_SIZE / 8 (512 bytes) + */ + r = emac_calc_rwmr(dev, rx_size / 8 / dev->fifo_entry_size, + rx_size / 4 / dev->fifo_entry_size); + out_be32(&p->rwmr, r); + + /* Set PAUSE timer to the maximum */ + out_be32(&p->ptr, 0xffff); + + /* IRQ sources */ + r = EMAC_ISR_OVR | EMAC_ISR_BP | EMAC_ISR_SE | + EMAC_ISR_ALE | EMAC_ISR_BFCS | EMAC_ISR_PTLE | EMAC_ISR_ORE | + EMAC_ISR_IRE | EMAC_ISR_TE; + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + r |= EMAC4_ISR_TXPE | EMAC4_ISR_RXPE /* | EMAC4_ISR_TXUE | + EMAC4_ISR_RXOE | */; + out_be32(&p->iser, r); + + /* We need to take GPCS PHY out of isolate mode after EMAC reset */ + if (emac_phy_gpcs(dev->phy.mode)) { + if (dev->phy.gpcs_address != 0xffffffff) + emac_mii_reset_gpcs(&dev->phy); + else + emac_mii_reset_phy(&dev->phy); + } + + return 0; +} + +static void emac_reinitialize(struct emac_instance *dev) +{ + DBG(dev, "reinitialize" NL); + + emac_netif_stop(dev); + if (!emac_configure(dev)) { + emac_tx_enable(dev); + emac_rx_enable(dev); + } + emac_netif_start(dev); +} + +static void emac_full_tx_reset(struct emac_instance *dev) +{ + DBG(dev, "full_tx_reset" NL); + + emac_tx_disable(dev); + mal_disable_tx_channel(dev->mal, dev->mal_tx_chan); + emac_clean_tx_ring(dev); + dev->tx_cnt = dev->tx_slot = dev->ack_slot = 0; + + emac_configure(dev); + + mal_enable_tx_channel(dev->mal, dev->mal_tx_chan); + emac_tx_enable(dev); + emac_rx_enable(dev); +} + +static void emac_reset_work(struct work_struct *work) +{ + struct emac_instance *dev = container_of(work, struct emac_instance, reset_work); + + DBG(dev, "reset_work" NL); + + mutex_lock(&dev->link_lock); + if (dev->opened) { + emac_netif_stop(dev); + emac_full_tx_reset(dev); + emac_netif_start(dev); + } + mutex_unlock(&dev->link_lock); +} + +static void emac_tx_timeout(struct net_device *ndev, unsigned int txqueue) +{ + struct emac_instance *dev = netdev_priv(ndev); + + DBG(dev, "tx_timeout" NL); + + schedule_work(&dev->reset_work); +} + + +static inline int emac_phy_done(struct emac_instance *dev, u32 stacr) +{ + int done = !!(stacr & EMAC_STACR_OC); + + if (emac_has_feature(dev, EMAC_FTR_STACR_OC_INVERT)) + done = !done; + + return done; +}; + +static int __emac_mdio_read(struct emac_instance *dev, u8 id, u8 reg) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r = 0; + int n, err = -ETIMEDOUT; + + mutex_lock(&dev->mdio_lock); + + DBG2(dev, "mdio_read(%02x,%02x)" NL, id, reg); + + /* Enable proper MDIO port */ + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_get_mdio(dev->zmii_dev, dev->zmii_port); + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_get_mdio(dev->rgmii_dev, dev->rgmii_port); + + /* Wait for management interface to become idle */ + n = 20; + while (!emac_phy_done(dev, in_be32(&p->stacr))) { + udelay(1); + if (!--n) { + DBG2(dev, " -> timeout wait idle\n"); + goto bail; + } + } + + /* Issue read command */ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + r = EMAC4_STACR_BASE(dev->opb_bus_freq); + else + r = EMAC_STACR_BASE(dev->opb_bus_freq); + if (emac_has_feature(dev, EMAC_FTR_STACR_OC_INVERT)) + r |= EMAC_STACR_OC; + if (emac_has_feature(dev, EMAC_FTR_HAS_NEW_STACR)) + r |= EMACX_STACR_STAC_READ; + else + r |= EMAC_STACR_STAC_READ; + r |= (reg & EMAC_STACR_PRA_MASK) + | ((id & EMAC_STACR_PCDA_MASK) << EMAC_STACR_PCDA_SHIFT); + out_be32(&p->stacr, r); + + /* Wait for read to complete */ + n = 200; + while (!emac_phy_done(dev, (r = in_be32(&p->stacr)))) { + udelay(1); + if (!--n) { + DBG2(dev, " -> timeout wait complete\n"); + goto bail; + } + } + + if (unlikely(r & EMAC_STACR_PHYE)) { + DBG(dev, "mdio_read(%02x, %02x) failed" NL, id, reg); + err = -EREMOTEIO; + goto bail; + } + + r = ((r >> EMAC_STACR_PHYD_SHIFT) & EMAC_STACR_PHYD_MASK); + + DBG2(dev, "mdio_read -> %04x" NL, r); + err = 0; + bail: + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_put_mdio(dev->rgmii_dev, dev->rgmii_port); + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_put_mdio(dev->zmii_dev, dev->zmii_port); + mutex_unlock(&dev->mdio_lock); + + return err == 0 ? r : err; +} + +static void __emac_mdio_write(struct emac_instance *dev, u8 id, u8 reg, + u16 val) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r = 0; + int n; + + mutex_lock(&dev->mdio_lock); + + DBG2(dev, "mdio_write(%02x,%02x,%04x)" NL, id, reg, val); + + /* Enable proper MDIO port */ + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_get_mdio(dev->zmii_dev, dev->zmii_port); + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_get_mdio(dev->rgmii_dev, dev->rgmii_port); + + /* Wait for management interface to be idle */ + n = 20; + while (!emac_phy_done(dev, in_be32(&p->stacr))) { + udelay(1); + if (!--n) { + DBG2(dev, " -> timeout wait idle\n"); + goto bail; + } + } + + /* Issue write command */ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + r = EMAC4_STACR_BASE(dev->opb_bus_freq); + else + r = EMAC_STACR_BASE(dev->opb_bus_freq); + if (emac_has_feature(dev, EMAC_FTR_STACR_OC_INVERT)) + r |= EMAC_STACR_OC; + if (emac_has_feature(dev, EMAC_FTR_HAS_NEW_STACR)) + r |= EMACX_STACR_STAC_WRITE; + else + r |= EMAC_STACR_STAC_WRITE; + r |= (reg & EMAC_STACR_PRA_MASK) | + ((id & EMAC_STACR_PCDA_MASK) << EMAC_STACR_PCDA_SHIFT) | + (val << EMAC_STACR_PHYD_SHIFT); + out_be32(&p->stacr, r); + + /* Wait for write to complete */ + n = 200; + while (!emac_phy_done(dev, in_be32(&p->stacr))) { + udelay(1); + if (!--n) { + DBG2(dev, " -> timeout wait complete\n"); + goto bail; + } + } + bail: + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_put_mdio(dev->rgmii_dev, dev->rgmii_port); + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_put_mdio(dev->zmii_dev, dev->zmii_port); + mutex_unlock(&dev->mdio_lock); +} + +static int emac_mdio_read(struct net_device *ndev, int id, int reg) +{ + struct emac_instance *dev = netdev_priv(ndev); + int res; + + res = __emac_mdio_read((dev->mdio_instance && + dev->phy.gpcs_address != id) ? + dev->mdio_instance : dev, + (u8) id, (u8) reg); + return res; +} + +static void emac_mdio_write(struct net_device *ndev, int id, int reg, int val) +{ + struct emac_instance *dev = netdev_priv(ndev); + + __emac_mdio_write((dev->mdio_instance && + dev->phy.gpcs_address != id) ? + dev->mdio_instance : dev, + (u8) id, (u8) reg, (u16) val); +} + +/* Tx lock BH */ +static void __emac_set_multicast_list(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 rmr = emac_iff2rmr(dev->ndev); + + DBG(dev, "__multicast %08x" NL, rmr); + + /* I decided to relax register access rules here to avoid + * full EMAC reset. + * + * There is a real problem with EMAC4 core if we use MWSW_001 bit + * in MR1 register and do a full EMAC reset. + * One TX BD status update is delayed and, after EMAC reset, it + * never happens, resulting in TX hung (it'll be recovered by TX + * timeout handler eventually, but this is just gross). + * So we either have to do full TX reset or try to cheat here :) + * + * The only required change is to RX mode register, so I *think* all + * we need is just to stop RX channel. This seems to work on all + * tested SoCs. --ebs + * + * If we need the full reset, we might just trigger the workqueue + * and do it async... a bit nasty but should work --BenH + */ + dev->mcast_pending = 0; + emac_rx_disable(dev); + if (rmr & EMAC_RMR_MAE) + emac_hash_mc(dev); + out_be32(&p->rmr, rmr); + emac_rx_enable(dev); +} + +/* Tx lock BH */ +static void emac_set_multicast_list(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + + DBG(dev, "multicast" NL); + + BUG_ON(!netif_running(dev->ndev)); + + if (dev->no_mcast) { + dev->mcast_pending = 1; + return; + } + + mutex_lock(&dev->link_lock); + __emac_set_multicast_list(dev); + mutex_unlock(&dev->link_lock); +} + +static int emac_set_mac_address(struct net_device *ndev, void *sa) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct sockaddr *addr = sa; + struct emac_regs __iomem *p = dev->emacp; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + mutex_lock(&dev->link_lock); + + eth_hw_addr_set(ndev, addr->sa_data); + + emac_rx_disable(dev); + emac_tx_disable(dev); + out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]); + out_be32(&p->ialr, (ndev->dev_addr[2] << 24) | + (ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) | + ndev->dev_addr[5]); + emac_tx_enable(dev); + emac_rx_enable(dev); + + mutex_unlock(&dev->link_lock); + + return 0; +} + +static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) +{ + int rx_sync_size = emac_rx_sync_size(new_mtu); + int rx_skb_size = emac_rx_skb_size(new_mtu); + int i, ret = 0; + int mr1_jumbo_bit_change = 0; + + mutex_lock(&dev->link_lock); + emac_netif_stop(dev); + emac_rx_disable(dev); + mal_disable_rx_channel(dev->mal, dev->mal_rx_chan); + + if (dev->rx_sg_skb) { + ++dev->estats.rx_dropped_resize; + dev_kfree_skb(dev->rx_sg_skb); + dev->rx_sg_skb = NULL; + } + + /* Make a first pass over RX ring and mark BDs ready, dropping + * non-processed packets on the way. We need this as a separate pass + * to simplify error recovery in the case of allocation failure later. + */ + for (i = 0; i < NUM_RX_BUFF; ++i) { + if (dev->rx_desc[i].ctrl & MAL_RX_CTRL_FIRST) + ++dev->estats.rx_dropped_resize; + + dev->rx_desc[i].data_len = 0; + dev->rx_desc[i].ctrl = MAL_RX_CTRL_EMPTY | + (i == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); + } + + /* Reallocate RX ring only if bigger skb buffers are required */ + if (rx_skb_size <= dev->rx_skb_size) + goto skip; + + /* Second pass, allocate new skbs */ + for (i = 0; i < NUM_RX_BUFF; ++i) { + struct sk_buff *skb; + + skb = netdev_alloc_skb_ip_align(dev->ndev, rx_skb_size); + if (!skb) { + ret = -ENOMEM; + goto oom; + } + + BUG_ON(!dev->rx_skb[i]); + dev_kfree_skb(dev->rx_skb[i]); + + dev->rx_desc[i].data_ptr = + dma_map_single(&dev->ofdev->dev, skb->data - NET_IP_ALIGN, + rx_sync_size, DMA_FROM_DEVICE) + + NET_IP_ALIGN; + dev->rx_skb[i] = skb; + } + skip: + /* Check if we need to change "Jumbo" bit in MR1 */ + if (emac_has_feature(dev, EMAC_APM821XX_REQ_JUMBO_FRAME_SIZE)) { + mr1_jumbo_bit_change = (new_mtu > ETH_DATA_LEN) || + (dev->ndev->mtu > ETH_DATA_LEN); + } else { + mr1_jumbo_bit_change = (new_mtu > ETH_DATA_LEN) ^ + (dev->ndev->mtu > ETH_DATA_LEN); + } + + if (mr1_jumbo_bit_change) { + /* This is to prevent starting RX channel in emac_rx_enable() */ + set_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags); + + dev->ndev->mtu = new_mtu; + emac_full_tx_reset(dev); + } + + mal_set_rcbs(dev->mal, dev->mal_rx_chan, emac_rx_size(new_mtu)); + oom: + /* Restart RX */ + clear_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags); + dev->rx_slot = 0; + mal_enable_rx_channel(dev->mal, dev->mal_rx_chan); + emac_rx_enable(dev); + emac_netif_start(dev); + mutex_unlock(&dev->link_lock); + + return ret; +} + +/* Process ctx, rtnl_lock semaphore */ +static int emac_change_mtu(struct net_device *ndev, int new_mtu) +{ + struct emac_instance *dev = netdev_priv(ndev); + int ret = 0; + + DBG(dev, "change_mtu(%d)" NL, new_mtu); + + if (netif_running(ndev)) { + /* Check if we really need to reinitialize RX ring */ + if (emac_rx_skb_size(ndev->mtu) != emac_rx_skb_size(new_mtu)) + ret = emac_resize_rx_ring(dev, new_mtu); + } + + if (!ret) { + ndev->mtu = new_mtu; + dev->rx_skb_size = emac_rx_skb_size(new_mtu); + dev->rx_sync_size = emac_rx_sync_size(new_mtu); + } + + return ret; +} + +static void emac_clean_tx_ring(struct emac_instance *dev) +{ + int i; + + for (i = 0; i < NUM_TX_BUFF; ++i) { + if (dev->tx_skb[i]) { + dev_kfree_skb(dev->tx_skb[i]); + dev->tx_skb[i] = NULL; + if (dev->tx_desc[i].ctrl & MAL_TX_CTRL_READY) + ++dev->estats.tx_dropped; + } + dev->tx_desc[i].ctrl = 0; + dev->tx_desc[i].data_ptr = 0; + } +} + +static void emac_clean_rx_ring(struct emac_instance *dev) +{ + int i; + + for (i = 0; i < NUM_RX_BUFF; ++i) + if (dev->rx_skb[i]) { + dev->rx_desc[i].ctrl = 0; + dev_kfree_skb(dev->rx_skb[i]); + dev->rx_skb[i] = NULL; + dev->rx_desc[i].data_ptr = 0; + } + + if (dev->rx_sg_skb) { + dev_kfree_skb(dev->rx_sg_skb); + dev->rx_sg_skb = NULL; + } +} + +static int +__emac_prepare_rx_skb(struct sk_buff *skb, struct emac_instance *dev, int slot) +{ + if (unlikely(!skb)) + return -ENOMEM; + + dev->rx_skb[slot] = skb; + dev->rx_desc[slot].data_len = 0; + + dev->rx_desc[slot].data_ptr = + dma_map_single(&dev->ofdev->dev, skb->data - NET_IP_ALIGN, + dev->rx_sync_size, DMA_FROM_DEVICE) + NET_IP_ALIGN; + wmb(); + dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY | + (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); + + return 0; +} + +static int +emac_alloc_rx_skb(struct emac_instance *dev, int slot) +{ + struct sk_buff *skb; + + skb = __netdev_alloc_skb_ip_align(dev->ndev, dev->rx_skb_size, + GFP_KERNEL); + + return __emac_prepare_rx_skb(skb, dev, slot); +} + +static int +emac_alloc_rx_skb_napi(struct emac_instance *dev, int slot) +{ + struct sk_buff *skb; + + skb = napi_alloc_skb(&dev->mal->napi, dev->rx_skb_size); + + return __emac_prepare_rx_skb(skb, dev, slot); +} + +static void emac_print_link_status(struct emac_instance *dev) +{ + if (netif_carrier_ok(dev->ndev)) + printk(KERN_INFO "%s: link is up, %d %s%s\n", + dev->ndev->name, dev->phy.speed, + dev->phy.duplex == DUPLEX_FULL ? "FDX" : "HDX", + dev->phy.pause ? ", pause enabled" : + dev->phy.asym_pause ? ", asymmetric pause enabled" : ""); + else + printk(KERN_INFO "%s: link is down\n", dev->ndev->name); +} + +/* Process ctx, rtnl_lock semaphore */ +static int emac_open(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + int err, i; + + DBG(dev, "open" NL); + + /* Setup error IRQ handler */ + err = request_irq(dev->emac_irq, emac_irq, 0, "EMAC", dev); + if (err) { + printk(KERN_ERR "%s: failed to request IRQ %d\n", + ndev->name, dev->emac_irq); + return err; + } + + /* Allocate RX ring */ + for (i = 0; i < NUM_RX_BUFF; ++i) + if (emac_alloc_rx_skb(dev, i)) { + printk(KERN_ERR "%s: failed to allocate RX ring\n", + ndev->name); + goto oom; + } + + dev->tx_cnt = dev->tx_slot = dev->ack_slot = dev->rx_slot = 0; + clear_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags); + dev->rx_sg_skb = NULL; + + mutex_lock(&dev->link_lock); + dev->opened = 1; + + /* Start PHY polling now. + */ + if (dev->phy.address >= 0) { + int link_poll_interval; + if (dev->phy.def->ops->poll_link(&dev->phy)) { + dev->phy.def->ops->read_link(&dev->phy); + emac_rx_clk_default(dev); + netif_carrier_on(dev->ndev); + link_poll_interval = PHY_POLL_LINK_ON; + } else { + emac_rx_clk_tx(dev); + netif_carrier_off(dev->ndev); + link_poll_interval = PHY_POLL_LINK_OFF; + } + dev->link_polling = 1; + wmb(); + schedule_delayed_work(&dev->link_work, link_poll_interval); + emac_print_link_status(dev); + } else + netif_carrier_on(dev->ndev); + + /* Required for Pause packet support in EMAC */ + dev_mc_add_global(ndev, default_mcast_addr); + + emac_configure(dev); + mal_poll_add(dev->mal, &dev->commac); + mal_enable_tx_channel(dev->mal, dev->mal_tx_chan); + mal_set_rcbs(dev->mal, dev->mal_rx_chan, emac_rx_size(ndev->mtu)); + mal_enable_rx_channel(dev->mal, dev->mal_rx_chan); + emac_tx_enable(dev); + emac_rx_enable(dev); + emac_netif_start(dev); + + mutex_unlock(&dev->link_lock); + + return 0; + oom: + emac_clean_rx_ring(dev); + free_irq(dev->emac_irq, dev); + + return -ENOMEM; +} + +/* BHs disabled */ +#if 0 +static int emac_link_differs(struct emac_instance *dev) +{ + u32 r = in_be32(&dev->emacp->mr1); + + int duplex = r & EMAC_MR1_FDE ? DUPLEX_FULL : DUPLEX_HALF; + int speed, pause, asym_pause; + + if (r & EMAC_MR1_MF_1000) + speed = SPEED_1000; + else if (r & EMAC_MR1_MF_100) + speed = SPEED_100; + else + speed = SPEED_10; + + switch (r & (EMAC_MR1_EIFC | EMAC_MR1_APP)) { + case (EMAC_MR1_EIFC | EMAC_MR1_APP): + pause = 1; + asym_pause = 0; + break; + case EMAC_MR1_APP: + pause = 0; + asym_pause = 1; + break; + default: + pause = asym_pause = 0; + } + return speed != dev->phy.speed || duplex != dev->phy.duplex || + pause != dev->phy.pause || asym_pause != dev->phy.asym_pause; +} +#endif + +static void emac_link_timer(struct work_struct *work) +{ + struct emac_instance *dev = + container_of(to_delayed_work(work), + struct emac_instance, link_work); + int link_poll_interval; + + mutex_lock(&dev->link_lock); + DBG2(dev, "link timer" NL); + + if (!dev->opened) + goto bail; + + if (dev->phy.def->ops->poll_link(&dev->phy)) { + if (!netif_carrier_ok(dev->ndev)) { + emac_rx_clk_default(dev); + /* Get new link parameters */ + dev->phy.def->ops->read_link(&dev->phy); + + netif_carrier_on(dev->ndev); + emac_netif_stop(dev); + emac_full_tx_reset(dev); + emac_netif_start(dev); + emac_print_link_status(dev); + } + link_poll_interval = PHY_POLL_LINK_ON; + } else { + if (netif_carrier_ok(dev->ndev)) { + emac_rx_clk_tx(dev); + netif_carrier_off(dev->ndev); + netif_tx_disable(dev->ndev); + emac_reinitialize(dev); + emac_print_link_status(dev); + } + link_poll_interval = PHY_POLL_LINK_OFF; + } + schedule_delayed_work(&dev->link_work, link_poll_interval); + bail: + mutex_unlock(&dev->link_lock); +} + +static void emac_force_link_update(struct emac_instance *dev) +{ + netif_carrier_off(dev->ndev); + smp_rmb(); + if (dev->link_polling) { + cancel_delayed_work_sync(&dev->link_work); + if (dev->link_polling) + schedule_delayed_work(&dev->link_work, PHY_POLL_LINK_OFF); + } +} + +/* Process ctx, rtnl_lock semaphore */ +static int emac_close(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + + DBG(dev, "close" NL); + + if (dev->phy.address >= 0) { + dev->link_polling = 0; + cancel_delayed_work_sync(&dev->link_work); + } + mutex_lock(&dev->link_lock); + emac_netif_stop(dev); + dev->opened = 0; + mutex_unlock(&dev->link_lock); + + emac_rx_disable(dev); + emac_tx_disable(dev); + mal_disable_rx_channel(dev->mal, dev->mal_rx_chan); + mal_disable_tx_channel(dev->mal, dev->mal_tx_chan); + mal_poll_del(dev->mal, &dev->commac); + + emac_clean_tx_ring(dev); + emac_clean_rx_ring(dev); + + free_irq(dev->emac_irq, dev); + + netif_carrier_off(ndev); + + return 0; +} + +static inline u16 emac_tx_csum(struct emac_instance *dev, + struct sk_buff *skb) +{ + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH) && + (skb->ip_summed == CHECKSUM_PARTIAL)) { + ++dev->stats.tx_packets_csum; + return EMAC_TX_CTRL_TAH_CSUM; + } + return 0; +} + +static inline netdev_tx_t emac_xmit_finish(struct emac_instance *dev, int len) +{ + struct emac_regs __iomem *p = dev->emacp; + struct net_device *ndev = dev->ndev; + + /* Send the packet out. If the if makes a significant perf + * difference, then we can store the TMR0 value in "dev" + * instead + */ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + out_be32(&p->tmr0, EMAC4_TMR0_XMIT); + else + out_be32(&p->tmr0, EMAC_TMR0_XMIT); + + if (unlikely(++dev->tx_cnt == NUM_TX_BUFF)) { + netif_stop_queue(ndev); + DBG2(dev, "stopped TX queue" NL); + } + + netif_trans_update(ndev); + ++dev->stats.tx_packets; + dev->stats.tx_bytes += len; + + return NETDEV_TX_OK; +} + +/* Tx lock BH */ +static netdev_tx_t emac_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + unsigned int len = skb->len; + int slot; + + u16 ctrl = EMAC_TX_CTRL_GFCS | EMAC_TX_CTRL_GP | MAL_TX_CTRL_READY | + MAL_TX_CTRL_LAST | emac_tx_csum(dev, skb); + + slot = dev->tx_slot++; + if (dev->tx_slot == NUM_TX_BUFF) { + dev->tx_slot = 0; + ctrl |= MAL_TX_CTRL_WRAP; + } + + DBG2(dev, "xmit(%u) %d" NL, len, slot); + + dev->tx_skb[slot] = skb; + dev->tx_desc[slot].data_ptr = dma_map_single(&dev->ofdev->dev, + skb->data, len, + DMA_TO_DEVICE); + dev->tx_desc[slot].data_len = (u16) len; + wmb(); + dev->tx_desc[slot].ctrl = ctrl; + + return emac_xmit_finish(dev, len); +} + +static inline int emac_xmit_split(struct emac_instance *dev, int slot, + u32 pd, int len, int last, u16 base_ctrl) +{ + while (1) { + u16 ctrl = base_ctrl; + int chunk = min(len, MAL_MAX_TX_SIZE); + len -= chunk; + + slot = (slot + 1) % NUM_TX_BUFF; + + if (last && !len) + ctrl |= MAL_TX_CTRL_LAST; + if (slot == NUM_TX_BUFF - 1) + ctrl |= MAL_TX_CTRL_WRAP; + + dev->tx_skb[slot] = NULL; + dev->tx_desc[slot].data_ptr = pd; + dev->tx_desc[slot].data_len = (u16) chunk; + dev->tx_desc[slot].ctrl = ctrl; + ++dev->tx_cnt; + + if (!len) + break; + + pd += chunk; + } + return slot; +} + +/* Tx lock BH disabled (SG version for TAH equipped EMACs) */ +static netdev_tx_t +emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + int nr_frags = skb_shinfo(skb)->nr_frags; + int len = skb->len, chunk; + int slot, i; + u16 ctrl; + u32 pd; + + /* This is common "fast" path */ + if (likely(!nr_frags && len <= MAL_MAX_TX_SIZE)) + return emac_start_xmit(skb, ndev); + + len -= skb->data_len; + + /* Note, this is only an *estimation*, we can still run out of empty + * slots because of the additional fragmentation into + * MAL_MAX_TX_SIZE-sized chunks + */ + if (unlikely(dev->tx_cnt + nr_frags + mal_tx_chunks(len) > NUM_TX_BUFF)) + goto stop_queue; + + ctrl = EMAC_TX_CTRL_GFCS | EMAC_TX_CTRL_GP | MAL_TX_CTRL_READY | + emac_tx_csum(dev, skb); + slot = dev->tx_slot; + + /* skb data */ + dev->tx_skb[slot] = NULL; + chunk = min(len, MAL_MAX_TX_SIZE); + dev->tx_desc[slot].data_ptr = pd = + dma_map_single(&dev->ofdev->dev, skb->data, len, DMA_TO_DEVICE); + dev->tx_desc[slot].data_len = (u16) chunk; + len -= chunk; + if (unlikely(len)) + slot = emac_xmit_split(dev, slot, pd + chunk, len, !nr_frags, + ctrl); + /* skb fragments */ + for (i = 0; i < nr_frags; ++i) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + len = skb_frag_size(frag); + + if (unlikely(dev->tx_cnt + mal_tx_chunks(len) >= NUM_TX_BUFF)) + goto undo_frame; + + pd = skb_frag_dma_map(&dev->ofdev->dev, frag, 0, len, + DMA_TO_DEVICE); + + slot = emac_xmit_split(dev, slot, pd, len, i == nr_frags - 1, + ctrl); + } + + DBG2(dev, "xmit_sg(%u) %d - %d" NL, skb->len, dev->tx_slot, slot); + + /* Attach skb to the last slot so we don't release it too early */ + dev->tx_skb[slot] = skb; + + /* Send the packet out */ + if (dev->tx_slot == NUM_TX_BUFF - 1) + ctrl |= MAL_TX_CTRL_WRAP; + wmb(); + dev->tx_desc[dev->tx_slot].ctrl = ctrl; + dev->tx_slot = (slot + 1) % NUM_TX_BUFF; + + return emac_xmit_finish(dev, skb->len); + + undo_frame: + /* Well, too bad. Our previous estimation was overly optimistic. + * Undo everything. + */ + while (slot != dev->tx_slot) { + dev->tx_desc[slot].ctrl = 0; + --dev->tx_cnt; + if (--slot < 0) + slot = NUM_TX_BUFF - 1; + } + ++dev->estats.tx_undo; + + stop_queue: + netif_stop_queue(ndev); + DBG2(dev, "stopped TX queue" NL); + return NETDEV_TX_BUSY; +} + +/* Tx lock BHs */ +static void emac_parse_tx_error(struct emac_instance *dev, u16 ctrl) +{ + struct emac_error_stats *st = &dev->estats; + + DBG(dev, "BD TX error %04x" NL, ctrl); + + ++st->tx_bd_errors; + if (ctrl & EMAC_TX_ST_BFCS) + ++st->tx_bd_bad_fcs; + if (ctrl & EMAC_TX_ST_LCS) + ++st->tx_bd_carrier_loss; + if (ctrl & EMAC_TX_ST_ED) + ++st->tx_bd_excessive_deferral; + if (ctrl & EMAC_TX_ST_EC) + ++st->tx_bd_excessive_collisions; + if (ctrl & EMAC_TX_ST_LC) + ++st->tx_bd_late_collision; + if (ctrl & EMAC_TX_ST_MC) + ++st->tx_bd_multple_collisions; + if (ctrl & EMAC_TX_ST_SC) + ++st->tx_bd_single_collision; + if (ctrl & EMAC_TX_ST_UR) + ++st->tx_bd_underrun; + if (ctrl & EMAC_TX_ST_SQE) + ++st->tx_bd_sqe; +} + +static void emac_poll_tx(void *param) +{ + struct emac_instance *dev = param; + u32 bad_mask; + + DBG2(dev, "poll_tx, %d %d" NL, dev->tx_cnt, dev->ack_slot); + + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) + bad_mask = EMAC_IS_BAD_TX_TAH; + else + bad_mask = EMAC_IS_BAD_TX; + + netif_tx_lock_bh(dev->ndev); + if (dev->tx_cnt) { + u16 ctrl; + int slot = dev->ack_slot, n = 0; + again: + ctrl = dev->tx_desc[slot].ctrl; + if (!(ctrl & MAL_TX_CTRL_READY)) { + struct sk_buff *skb = dev->tx_skb[slot]; + ++n; + + if (skb) { + dev_kfree_skb(skb); + dev->tx_skb[slot] = NULL; + } + slot = (slot + 1) % NUM_TX_BUFF; + + if (unlikely(ctrl & bad_mask)) + emac_parse_tx_error(dev, ctrl); + + if (--dev->tx_cnt) + goto again; + } + if (n) { + dev->ack_slot = slot; + if (netif_queue_stopped(dev->ndev) && + dev->tx_cnt < EMAC_TX_WAKEUP_THRESH) + netif_wake_queue(dev->ndev); + + DBG2(dev, "tx %d pkts" NL, n); + } + } + netif_tx_unlock_bh(dev->ndev); +} + +static inline void emac_recycle_rx_skb(struct emac_instance *dev, int slot, + int len) +{ + struct sk_buff *skb = dev->rx_skb[slot]; + + DBG2(dev, "recycle %d %d" NL, slot, len); + + if (len) + dma_map_single(&dev->ofdev->dev, skb->data - NET_IP_ALIGN, + SKB_DATA_ALIGN(len + NET_IP_ALIGN), + DMA_FROM_DEVICE); + + dev->rx_desc[slot].data_len = 0; + wmb(); + dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY | + (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); +} + +static void emac_parse_rx_error(struct emac_instance *dev, u16 ctrl) +{ + struct emac_error_stats *st = &dev->estats; + + DBG(dev, "BD RX error %04x" NL, ctrl); + + ++st->rx_bd_errors; + if (ctrl & EMAC_RX_ST_OE) + ++st->rx_bd_overrun; + if (ctrl & EMAC_RX_ST_BP) + ++st->rx_bd_bad_packet; + if (ctrl & EMAC_RX_ST_RP) + ++st->rx_bd_runt_packet; + if (ctrl & EMAC_RX_ST_SE) + ++st->rx_bd_short_event; + if (ctrl & EMAC_RX_ST_AE) + ++st->rx_bd_alignment_error; + if (ctrl & EMAC_RX_ST_BFCS) + ++st->rx_bd_bad_fcs; + if (ctrl & EMAC_RX_ST_PTL) + ++st->rx_bd_packet_too_long; + if (ctrl & EMAC_RX_ST_ORE) + ++st->rx_bd_out_of_range; + if (ctrl & EMAC_RX_ST_IRE) + ++st->rx_bd_in_range; +} + +static inline void emac_rx_csum(struct emac_instance *dev, + struct sk_buff *skb, u16 ctrl) +{ +#ifdef CONFIG_IBM_EMAC_TAH + if (!ctrl && dev->tah_dev) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + ++dev->stats.rx_packets_csum; + } +#endif +} + +static inline int emac_rx_sg_append(struct emac_instance *dev, int slot) +{ + if (likely(dev->rx_sg_skb != NULL)) { + int len = dev->rx_desc[slot].data_len; + int tot_len = dev->rx_sg_skb->len + len; + + if (unlikely(tot_len + NET_IP_ALIGN > dev->rx_skb_size)) { + ++dev->estats.rx_dropped_mtu; + dev_kfree_skb(dev->rx_sg_skb); + dev->rx_sg_skb = NULL; + } else { + memcpy(skb_tail_pointer(dev->rx_sg_skb), + dev->rx_skb[slot]->data, len); + skb_put(dev->rx_sg_skb, len); + emac_recycle_rx_skb(dev, slot, len); + return 0; + } + } + emac_recycle_rx_skb(dev, slot, 0); + return -1; +} + +/* NAPI poll context */ +static int emac_poll_rx(void *param, int budget) +{ + struct emac_instance *dev = param; + int slot = dev->rx_slot, received = 0; + + DBG2(dev, "poll_rx(%d)" NL, budget); + + again: + while (budget > 0) { + int len; + struct sk_buff *skb; + u16 ctrl = dev->rx_desc[slot].ctrl; + + if (ctrl & MAL_RX_CTRL_EMPTY) + break; + + skb = dev->rx_skb[slot]; + mb(); + len = dev->rx_desc[slot].data_len; + + if (unlikely(!MAL_IS_SINGLE_RX(ctrl))) + goto sg; + + ctrl &= EMAC_BAD_RX_MASK; + if (unlikely(ctrl && ctrl != EMAC_RX_TAH_BAD_CSUM)) { + emac_parse_rx_error(dev, ctrl); + ++dev->estats.rx_dropped_error; + emac_recycle_rx_skb(dev, slot, 0); + len = 0; + goto next; + } + + if (len < ETH_HLEN) { + ++dev->estats.rx_dropped_stack; + emac_recycle_rx_skb(dev, slot, len); + goto next; + } + + if (len && len < EMAC_RX_COPY_THRESH) { + struct sk_buff *copy_skb; + + copy_skb = napi_alloc_skb(&dev->mal->napi, len); + if (unlikely(!copy_skb)) + goto oom; + + memcpy(copy_skb->data - NET_IP_ALIGN, + skb->data - NET_IP_ALIGN, + len + NET_IP_ALIGN); + emac_recycle_rx_skb(dev, slot, len); + skb = copy_skb; + } else if (unlikely(emac_alloc_rx_skb_napi(dev, slot))) + goto oom; + + skb_put(skb, len); + push_packet: + skb->protocol = eth_type_trans(skb, dev->ndev); + emac_rx_csum(dev, skb, ctrl); + + if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) + ++dev->estats.rx_dropped_stack; + next: + ++dev->stats.rx_packets; + skip: + dev->stats.rx_bytes += len; + slot = (slot + 1) % NUM_RX_BUFF; + --budget; + ++received; + continue; + sg: + if (ctrl & MAL_RX_CTRL_FIRST) { + BUG_ON(dev->rx_sg_skb); + if (unlikely(emac_alloc_rx_skb_napi(dev, slot))) { + DBG(dev, "rx OOM %d" NL, slot); + ++dev->estats.rx_dropped_oom; + emac_recycle_rx_skb(dev, slot, 0); + } else { + dev->rx_sg_skb = skb; + skb_put(skb, len); + } + } else if (!emac_rx_sg_append(dev, slot) && + (ctrl & MAL_RX_CTRL_LAST)) { + + skb = dev->rx_sg_skb; + dev->rx_sg_skb = NULL; + + ctrl &= EMAC_BAD_RX_MASK; + if (unlikely(ctrl && ctrl != EMAC_RX_TAH_BAD_CSUM)) { + emac_parse_rx_error(dev, ctrl); + ++dev->estats.rx_dropped_error; + dev_kfree_skb(skb); + len = 0; + } else + goto push_packet; + } + goto skip; + oom: + DBG(dev, "rx OOM %d" NL, slot); + /* Drop the packet and recycle skb */ + ++dev->estats.rx_dropped_oom; + emac_recycle_rx_skb(dev, slot, 0); + goto next; + } + + if (received) { + DBG2(dev, "rx %d BDs" NL, received); + dev->rx_slot = slot; + } + + if (unlikely(budget && test_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags))) { + mb(); + if (!(dev->rx_desc[slot].ctrl & MAL_RX_CTRL_EMPTY)) { + DBG2(dev, "rx restart" NL); + received = 0; + goto again; + } + + if (dev->rx_sg_skb) { + DBG2(dev, "dropping partial rx packet" NL); + ++dev->estats.rx_dropped_error; + dev_kfree_skb(dev->rx_sg_skb); + dev->rx_sg_skb = NULL; + } + + clear_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags); + mal_enable_rx_channel(dev->mal, dev->mal_rx_chan); + emac_rx_enable(dev); + dev->rx_slot = 0; + } + return received; +} + +/* NAPI poll context */ +static int emac_peek_rx(void *param) +{ + struct emac_instance *dev = param; + + return !(dev->rx_desc[dev->rx_slot].ctrl & MAL_RX_CTRL_EMPTY); +} + +/* NAPI poll context */ +static int emac_peek_rx_sg(void *param) +{ + struct emac_instance *dev = param; + + int slot = dev->rx_slot; + while (1) { + u16 ctrl = dev->rx_desc[slot].ctrl; + if (ctrl & MAL_RX_CTRL_EMPTY) + return 0; + else if (ctrl & MAL_RX_CTRL_LAST) + return 1; + + slot = (slot + 1) % NUM_RX_BUFF; + + /* I'm just being paranoid here :) */ + if (unlikely(slot == dev->rx_slot)) + return 0; + } +} + +/* Hard IRQ */ +static void emac_rxde(void *param) +{ + struct emac_instance *dev = param; + + ++dev->estats.rx_stopped; + emac_rx_disable_async(dev); +} + +/* Hard IRQ */ +static irqreturn_t emac_irq(int irq, void *dev_instance) +{ + struct emac_instance *dev = dev_instance; + struct emac_regs __iomem *p = dev->emacp; + struct emac_error_stats *st = &dev->estats; + u32 isr; + + spin_lock(&dev->lock); + + isr = in_be32(&p->isr); + out_be32(&p->isr, isr); + + DBG(dev, "isr = %08x" NL, isr); + + if (isr & EMAC4_ISR_TXPE) + ++st->tx_parity; + if (isr & EMAC4_ISR_RXPE) + ++st->rx_parity; + if (isr & EMAC4_ISR_TXUE) + ++st->tx_underrun; + if (isr & EMAC4_ISR_RXOE) + ++st->rx_fifo_overrun; + if (isr & EMAC_ISR_OVR) + ++st->rx_overrun; + if (isr & EMAC_ISR_BP) + ++st->rx_bad_packet; + if (isr & EMAC_ISR_RP) + ++st->rx_runt_packet; + if (isr & EMAC_ISR_SE) + ++st->rx_short_event; + if (isr & EMAC_ISR_ALE) + ++st->rx_alignment_error; + if (isr & EMAC_ISR_BFCS) + ++st->rx_bad_fcs; + if (isr & EMAC_ISR_PTLE) + ++st->rx_packet_too_long; + if (isr & EMAC_ISR_ORE) + ++st->rx_out_of_range; + if (isr & EMAC_ISR_IRE) + ++st->rx_in_range; + if (isr & EMAC_ISR_SQE) + ++st->tx_sqe; + if (isr & EMAC_ISR_TE) + ++st->tx_errors; + + spin_unlock(&dev->lock); + + return IRQ_HANDLED; +} + +static struct net_device_stats *emac_stats(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct emac_stats *st = &dev->stats; + struct emac_error_stats *est = &dev->estats; + struct net_device_stats *nst = &ndev->stats; + unsigned long flags; + + DBG2(dev, "stats" NL); + + /* Compute "legacy" statistics */ + spin_lock_irqsave(&dev->lock, flags); + nst->rx_packets = (unsigned long)st->rx_packets; + nst->rx_bytes = (unsigned long)st->rx_bytes; + nst->tx_packets = (unsigned long)st->tx_packets; + nst->tx_bytes = (unsigned long)st->tx_bytes; + nst->rx_dropped = (unsigned long)(est->rx_dropped_oom + + est->rx_dropped_error + + est->rx_dropped_resize + + est->rx_dropped_mtu); + nst->tx_dropped = (unsigned long)est->tx_dropped; + + nst->rx_errors = (unsigned long)est->rx_bd_errors; + nst->rx_fifo_errors = (unsigned long)(est->rx_bd_overrun + + est->rx_fifo_overrun + + est->rx_overrun); + nst->rx_frame_errors = (unsigned long)(est->rx_bd_alignment_error + + est->rx_alignment_error); + nst->rx_crc_errors = (unsigned long)(est->rx_bd_bad_fcs + + est->rx_bad_fcs); + nst->rx_length_errors = (unsigned long)(est->rx_bd_runt_packet + + est->rx_bd_short_event + + est->rx_bd_packet_too_long + + est->rx_bd_out_of_range + + est->rx_bd_in_range + + est->rx_runt_packet + + est->rx_short_event + + est->rx_packet_too_long + + est->rx_out_of_range + + est->rx_in_range); + + nst->tx_errors = (unsigned long)(est->tx_bd_errors + est->tx_errors); + nst->tx_fifo_errors = (unsigned long)(est->tx_bd_underrun + + est->tx_underrun); + nst->tx_carrier_errors = (unsigned long)est->tx_bd_carrier_loss; + nst->collisions = (unsigned long)(est->tx_bd_excessive_deferral + + est->tx_bd_excessive_collisions + + est->tx_bd_late_collision + + est->tx_bd_multple_collisions); + spin_unlock_irqrestore(&dev->lock, flags); + return nst; +} + +static struct mal_commac_ops emac_commac_ops = { + .poll_tx = &emac_poll_tx, + .poll_rx = &emac_poll_rx, + .peek_rx = &emac_peek_rx, + .rxde = &emac_rxde, +}; + +static struct mal_commac_ops emac_commac_sg_ops = { + .poll_tx = &emac_poll_tx, + .poll_rx = &emac_poll_rx, + .peek_rx = &emac_peek_rx_sg, + .rxde = &emac_rxde, +}; + +/* Ethtool support */ +static int emac_ethtool_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) +{ + struct emac_instance *dev = netdev_priv(ndev); + u32 supported, advertising; + + supported = dev->phy.features; + cmd->base.port = PORT_MII; + cmd->base.phy_address = dev->phy.address; + + mutex_lock(&dev->link_lock); + advertising = dev->phy.advertising; + cmd->base.autoneg = dev->phy.autoneg; + cmd->base.speed = dev->phy.speed; + cmd->base.duplex = dev->phy.duplex; + mutex_unlock(&dev->link_lock); + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + + return 0; +} + +static int +emac_ethtool_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) +{ + struct emac_instance *dev = netdev_priv(ndev); + u32 f = dev->phy.features; + u32 advertising; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + + DBG(dev, "set_settings(%d, %d, %d, 0x%08x)" NL, + cmd->base.autoneg, cmd->base.speed, cmd->base.duplex, advertising); + + /* Basic sanity checks */ + if (dev->phy.address < 0) + return -EOPNOTSUPP; + if (cmd->base.autoneg != AUTONEG_ENABLE && + cmd->base.autoneg != AUTONEG_DISABLE) + return -EINVAL; + if (cmd->base.autoneg == AUTONEG_ENABLE && advertising == 0) + return -EINVAL; + if (cmd->base.duplex != DUPLEX_HALF && cmd->base.duplex != DUPLEX_FULL) + return -EINVAL; + + if (cmd->base.autoneg == AUTONEG_DISABLE) { + switch (cmd->base.speed) { + case SPEED_10: + if (cmd->base.duplex == DUPLEX_HALF && + !(f & SUPPORTED_10baseT_Half)) + return -EINVAL; + if (cmd->base.duplex == DUPLEX_FULL && + !(f & SUPPORTED_10baseT_Full)) + return -EINVAL; + break; + case SPEED_100: + if (cmd->base.duplex == DUPLEX_HALF && + !(f & SUPPORTED_100baseT_Half)) + return -EINVAL; + if (cmd->base.duplex == DUPLEX_FULL && + !(f & SUPPORTED_100baseT_Full)) + return -EINVAL; + break; + case SPEED_1000: + if (cmd->base.duplex == DUPLEX_HALF && + !(f & SUPPORTED_1000baseT_Half)) + return -EINVAL; + if (cmd->base.duplex == DUPLEX_FULL && + !(f & SUPPORTED_1000baseT_Full)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + mutex_lock(&dev->link_lock); + dev->phy.def->ops->setup_forced(&dev->phy, cmd->base.speed, + cmd->base.duplex); + mutex_unlock(&dev->link_lock); + + } else { + if (!(f & SUPPORTED_Autoneg)) + return -EINVAL; + + mutex_lock(&dev->link_lock); + dev->phy.def->ops->setup_aneg(&dev->phy, + (advertising & f) | + (dev->phy.advertising & + (ADVERTISED_Pause | + ADVERTISED_Asym_Pause))); + mutex_unlock(&dev->link_lock); + } + emac_force_link_update(dev); + + return 0; +} + +static void +emac_ethtool_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *rp, + struct kernel_ethtool_ringparam *kernel_rp, + struct netlink_ext_ack *extack) +{ + rp->rx_max_pending = rp->rx_pending = NUM_RX_BUFF; + rp->tx_max_pending = rp->tx_pending = NUM_TX_BUFF; +} + +static void emac_ethtool_get_pauseparam(struct net_device *ndev, + struct ethtool_pauseparam *pp) +{ + struct emac_instance *dev = netdev_priv(ndev); + + mutex_lock(&dev->link_lock); + if ((dev->phy.features & SUPPORTED_Autoneg) && + (dev->phy.advertising & (ADVERTISED_Pause | ADVERTISED_Asym_Pause))) + pp->autoneg = 1; + + if (dev->phy.duplex == DUPLEX_FULL) { + if (dev->phy.pause) + pp->rx_pause = pp->tx_pause = 1; + else if (dev->phy.asym_pause) + pp->tx_pause = 1; + } + mutex_unlock(&dev->link_lock); +} + +static int emac_get_regs_len(struct emac_instance *dev) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct emac_regs); +} + +static int emac_ethtool_get_regs_len(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + int size; + + size = sizeof(struct emac_ethtool_regs_hdr) + + emac_get_regs_len(dev) + mal_get_regs_len(dev->mal); + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + size += zmii_get_regs_len(dev->zmii_dev); + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + size += rgmii_get_regs_len(dev->rgmii_dev); + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) + size += tah_get_regs_len(dev->tah_dev); + + return size; +} + +static void *emac_dump_regs(struct emac_instance *dev, void *buf) +{ + struct emac_ethtool_regs_subhdr *hdr = buf; + + hdr->index = dev->cell_index; + if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) { + hdr->version = EMAC4SYNC_ETHTOOL_REGS_VER; + } else if (emac_has_feature(dev, EMAC_FTR_EMAC4)) { + hdr->version = EMAC4_ETHTOOL_REGS_VER; + } else { + hdr->version = EMAC_ETHTOOL_REGS_VER; + } + memcpy_fromio(hdr + 1, dev->emacp, sizeof(struct emac_regs)); + return (void *)(hdr + 1) + sizeof(struct emac_regs); +} + +static void emac_ethtool_get_regs(struct net_device *ndev, + struct ethtool_regs *regs, void *buf) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct emac_ethtool_regs_hdr *hdr = buf; + + hdr->components = 0; + buf = hdr + 1; + + buf = mal_dump_regs(dev->mal, buf); + buf = emac_dump_regs(dev, buf); + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) { + hdr->components |= EMAC_ETHTOOL_REGS_ZMII; + buf = zmii_dump_regs(dev->zmii_dev, buf); + } + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) { + hdr->components |= EMAC_ETHTOOL_REGS_RGMII; + buf = rgmii_dump_regs(dev->rgmii_dev, buf); + } + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) { + hdr->components |= EMAC_ETHTOOL_REGS_TAH; + buf = tah_dump_regs(dev->tah_dev, buf); + } +} + +static int emac_ethtool_nway_reset(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + int res = 0; + + DBG(dev, "nway_reset" NL); + + if (dev->phy.address < 0) + return -EOPNOTSUPP; + + mutex_lock(&dev->link_lock); + if (!dev->phy.autoneg) { + res = -EINVAL; + goto out; + } + + dev->phy.def->ops->setup_aneg(&dev->phy, dev->phy.advertising); + out: + mutex_unlock(&dev->link_lock); + emac_force_link_update(dev); + return res; +} + +static int emac_ethtool_get_sset_count(struct net_device *ndev, int stringset) +{ + if (stringset == ETH_SS_STATS) + return EMAC_ETHTOOL_STATS_COUNT; + else + return -EINVAL; +} + +static void emac_ethtool_get_strings(struct net_device *ndev, u32 stringset, + u8 * buf) +{ + if (stringset == ETH_SS_STATS) + memcpy(buf, &emac_stats_keys, sizeof(emac_stats_keys)); +} + +static void emac_ethtool_get_ethtool_stats(struct net_device *ndev, + struct ethtool_stats *estats, + u64 * tmp_stats) +{ + struct emac_instance *dev = netdev_priv(ndev); + + memcpy(tmp_stats, &dev->stats, sizeof(dev->stats)); + tmp_stats += sizeof(dev->stats) / sizeof(u64); + memcpy(tmp_stats, &dev->estats, sizeof(dev->estats)); +} + +static void emac_ethtool_get_drvinfo(struct net_device *ndev, + struct ethtool_drvinfo *info) +{ + struct emac_instance *dev = netdev_priv(ndev); + + strscpy(info->driver, "ibm_emac", sizeof(info->driver)); + strscpy(info->version, DRV_VERSION, sizeof(info->version)); + snprintf(info->bus_info, sizeof(info->bus_info), "PPC 4xx EMAC-%d %pOF", + dev->cell_index, dev->ofdev->dev.of_node); +} + +static const struct ethtool_ops emac_ethtool_ops = { + .get_drvinfo = emac_ethtool_get_drvinfo, + + .get_regs_len = emac_ethtool_get_regs_len, + .get_regs = emac_ethtool_get_regs, + + .nway_reset = emac_ethtool_nway_reset, + + .get_ringparam = emac_ethtool_get_ringparam, + .get_pauseparam = emac_ethtool_get_pauseparam, + + .get_strings = emac_ethtool_get_strings, + .get_sset_count = emac_ethtool_get_sset_count, + .get_ethtool_stats = emac_ethtool_get_ethtool_stats, + + .get_link = ethtool_op_get_link, + .get_link_ksettings = emac_ethtool_get_link_ksettings, + .set_link_ksettings = emac_ethtool_set_link_ksettings, +}; + +static int emac_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct mii_ioctl_data *data = if_mii(rq); + + DBG(dev, "ioctl %08x" NL, cmd); + + if (dev->phy.address < 0) + return -EOPNOTSUPP; + + switch (cmd) { + case SIOCGMIIPHY: + data->phy_id = dev->phy.address; + fallthrough; + case SIOCGMIIREG: + data->val_out = emac_mdio_read(ndev, dev->phy.address, + data->reg_num); + return 0; + + case SIOCSMIIREG: + emac_mdio_write(ndev, dev->phy.address, data->reg_num, + data->val_in); + return 0; + default: + return -EOPNOTSUPP; + } +} + +struct emac_depentry { + u32 phandle; + struct device_node *node; + struct platform_device *ofdev; + void *drvdata; +}; + +#define EMAC_DEP_MAL_IDX 0 +#define EMAC_DEP_ZMII_IDX 1 +#define EMAC_DEP_RGMII_IDX 2 +#define EMAC_DEP_TAH_IDX 3 +#define EMAC_DEP_MDIO_IDX 4 +#define EMAC_DEP_PREV_IDX 5 +#define EMAC_DEP_COUNT 6 + +static int emac_check_deps(struct emac_instance *dev, + struct emac_depentry *deps) +{ + int i, there = 0; + struct device_node *np; + + for (i = 0; i < EMAC_DEP_COUNT; i++) { + /* no dependency on that item, allright */ + if (deps[i].phandle == 0) { + there++; + continue; + } + /* special case for blist as the dependency might go away */ + if (i == EMAC_DEP_PREV_IDX) { + np = *(dev->blist - 1); + if (np == NULL) { + deps[i].phandle = 0; + there++; + continue; + } + if (deps[i].node == NULL) + deps[i].node = of_node_get(np); + } + if (deps[i].node == NULL) + deps[i].node = of_find_node_by_phandle(deps[i].phandle); + if (deps[i].node == NULL) + continue; + if (deps[i].ofdev == NULL) + deps[i].ofdev = of_find_device_by_node(deps[i].node); + if (deps[i].ofdev == NULL) + continue; + if (deps[i].drvdata == NULL) + deps[i].drvdata = platform_get_drvdata(deps[i].ofdev); + if (deps[i].drvdata != NULL) + there++; + } + return there == EMAC_DEP_COUNT; +} + +static void emac_put_deps(struct emac_instance *dev) +{ + platform_device_put(dev->mal_dev); + platform_device_put(dev->zmii_dev); + platform_device_put(dev->rgmii_dev); + platform_device_put(dev->mdio_dev); + platform_device_put(dev->tah_dev); +} + +static int emac_of_bus_notify(struct notifier_block *nb, unsigned long action, + void *data) +{ + /* We are only intereted in device addition */ + if (action == BUS_NOTIFY_BOUND_DRIVER) + wake_up_all(&emac_probe_wait); + return 0; +} + +static struct notifier_block emac_of_bus_notifier = { + .notifier_call = emac_of_bus_notify +}; + +static int emac_wait_deps(struct emac_instance *dev) +{ + struct emac_depentry deps[EMAC_DEP_COUNT]; + int i, err; + + memset(&deps, 0, sizeof(deps)); + + deps[EMAC_DEP_MAL_IDX].phandle = dev->mal_ph; + deps[EMAC_DEP_ZMII_IDX].phandle = dev->zmii_ph; + deps[EMAC_DEP_RGMII_IDX].phandle = dev->rgmii_ph; + if (dev->tah_ph) + deps[EMAC_DEP_TAH_IDX].phandle = dev->tah_ph; + if (dev->mdio_ph) + deps[EMAC_DEP_MDIO_IDX].phandle = dev->mdio_ph; + if (dev->blist && dev->blist > emac_boot_list) + deps[EMAC_DEP_PREV_IDX].phandle = 0xffffffffu; + bus_register_notifier(&platform_bus_type, &emac_of_bus_notifier); + wait_event_timeout(emac_probe_wait, + emac_check_deps(dev, deps), + EMAC_PROBE_DEP_TIMEOUT); + bus_unregister_notifier(&platform_bus_type, &emac_of_bus_notifier); + err = emac_check_deps(dev, deps) ? 0 : -ENODEV; + for (i = 0; i < EMAC_DEP_COUNT; i++) { + of_node_put(deps[i].node); + if (err) + platform_device_put(deps[i].ofdev); + } + if (err == 0) { + dev->mal_dev = deps[EMAC_DEP_MAL_IDX].ofdev; + dev->zmii_dev = deps[EMAC_DEP_ZMII_IDX].ofdev; + dev->rgmii_dev = deps[EMAC_DEP_RGMII_IDX].ofdev; + dev->tah_dev = deps[EMAC_DEP_TAH_IDX].ofdev; + dev->mdio_dev = deps[EMAC_DEP_MDIO_IDX].ofdev; + } + platform_device_put(deps[EMAC_DEP_PREV_IDX].ofdev); + return err; +} + +static int emac_read_uint_prop(struct device_node *np, const char *name, + u32 *val, int fatal) +{ + int len; + const u32 *prop = of_get_property(np, name, &len); + if (prop == NULL || len < sizeof(u32)) { + if (fatal) + printk(KERN_ERR "%pOF: missing %s property\n", + np, name); + return -ENODEV; + } + *val = *prop; + return 0; +} + +static void emac_adjust_link(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct phy_device *phy = dev->phy_dev; + + dev->phy.autoneg = phy->autoneg; + dev->phy.speed = phy->speed; + dev->phy.duplex = phy->duplex; + dev->phy.pause = phy->pause; + dev->phy.asym_pause = phy->asym_pause; + ethtool_convert_link_mode_to_legacy_u32(&dev->phy.advertising, + phy->advertising); +} + +static int emac_mii_bus_read(struct mii_bus *bus, int addr, int regnum) +{ + int ret = emac_mdio_read(bus->priv, addr, regnum); + /* This is a workaround for powered down ports/phys. + * In the wild, this was seen on the Cisco Meraki MX60(W). + * This hardware disables ports as part of the handoff + * procedure. Accessing the ports will lead to errors + * (-ETIMEDOUT, -EREMOTEIO) that do more harm than good. + */ + return ret < 0 ? 0xffff : ret; +} + +static int emac_mii_bus_write(struct mii_bus *bus, int addr, + int regnum, u16 val) +{ + emac_mdio_write(bus->priv, addr, regnum, val); + return 0; +} + +static int emac_mii_bus_reset(struct mii_bus *bus) +{ + struct emac_instance *dev = netdev_priv(bus->priv); + + return emac_reset(dev); +} + +static int emac_mdio_phy_start_aneg(struct mii_phy *phy, + struct phy_device *phy_dev) +{ + phy_dev->autoneg = phy->autoneg; + phy_dev->speed = phy->speed; + phy_dev->duplex = phy->duplex; + ethtool_convert_legacy_u32_to_link_mode(phy_dev->advertising, + phy->advertising); + return phy_start_aneg(phy_dev); +} + +static int emac_mdio_setup_aneg(struct mii_phy *phy, u32 advertise) +{ + struct net_device *ndev = phy->dev; + struct emac_instance *dev = netdev_priv(ndev); + + phy->autoneg = AUTONEG_ENABLE; + phy->advertising = advertise; + return emac_mdio_phy_start_aneg(phy, dev->phy_dev); +} + +static int emac_mdio_setup_forced(struct mii_phy *phy, int speed, int fd) +{ + struct net_device *ndev = phy->dev; + struct emac_instance *dev = netdev_priv(ndev); + + phy->autoneg = AUTONEG_DISABLE; + phy->speed = speed; + phy->duplex = fd; + return emac_mdio_phy_start_aneg(phy, dev->phy_dev); +} + +static int emac_mdio_poll_link(struct mii_phy *phy) +{ + struct net_device *ndev = phy->dev; + struct emac_instance *dev = netdev_priv(ndev); + int res; + + res = phy_read_status(dev->phy_dev); + if (res) { + dev_err(&dev->ofdev->dev, "link update failed (%d).", res); + return ethtool_op_get_link(ndev); + } + + return dev->phy_dev->link; +} + +static int emac_mdio_read_link(struct mii_phy *phy) +{ + struct net_device *ndev = phy->dev; + struct emac_instance *dev = netdev_priv(ndev); + struct phy_device *phy_dev = dev->phy_dev; + int res; + + res = phy_read_status(phy_dev); + if (res) + return res; + + phy->speed = phy_dev->speed; + phy->duplex = phy_dev->duplex; + phy->pause = phy_dev->pause; + phy->asym_pause = phy_dev->asym_pause; + return 0; +} + +static int emac_mdio_init_phy(struct mii_phy *phy) +{ + struct net_device *ndev = phy->dev; + struct emac_instance *dev = netdev_priv(ndev); + + phy_start(dev->phy_dev); + return phy_init_hw(dev->phy_dev); +} + +static const struct mii_phy_ops emac_dt_mdio_phy_ops = { + .init = emac_mdio_init_phy, + .setup_aneg = emac_mdio_setup_aneg, + .setup_forced = emac_mdio_setup_forced, + .poll_link = emac_mdio_poll_link, + .read_link = emac_mdio_read_link, +}; + +static int emac_dt_mdio_probe(struct emac_instance *dev) +{ + struct device_node *mii_np; + int res; + + mii_np = of_get_child_by_name(dev->ofdev->dev.of_node, "mdio"); + if (!mii_np) { + dev_err(&dev->ofdev->dev, "no mdio definition found."); + return -ENODEV; + } + + if (!of_device_is_available(mii_np)) { + res = -ENODEV; + goto put_node; + } + + dev->mii_bus = devm_mdiobus_alloc(&dev->ofdev->dev); + if (!dev->mii_bus) { + res = -ENOMEM; + goto put_node; + } + + dev->mii_bus->priv = dev->ndev; + dev->mii_bus->parent = dev->ndev->dev.parent; + dev->mii_bus->name = "emac_mdio"; + dev->mii_bus->read = &emac_mii_bus_read; + dev->mii_bus->write = &emac_mii_bus_write; + dev->mii_bus->reset = &emac_mii_bus_reset; + snprintf(dev->mii_bus->id, MII_BUS_ID_SIZE, "%s", dev->ofdev->name); + res = of_mdiobus_register(dev->mii_bus, mii_np); + if (res) { + dev_err(&dev->ofdev->dev, "cannot register MDIO bus %s (%d)", + dev->mii_bus->name, res); + } + + put_node: + of_node_put(mii_np); + return res; +} + +static int emac_dt_phy_connect(struct emac_instance *dev, + struct device_node *phy_handle) +{ + dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def), + GFP_KERNEL); + if (!dev->phy.def) + return -ENOMEM; + + dev->phy_dev = of_phy_connect(dev->ndev, phy_handle, &emac_adjust_link, + 0, dev->phy_mode); + if (!dev->phy_dev) { + dev_err(&dev->ofdev->dev, "failed to connect to PHY.\n"); + return -ENODEV; + } + + dev->phy.def->phy_id = dev->phy_dev->drv->phy_id; + dev->phy.def->phy_id_mask = dev->phy_dev->drv->phy_id_mask; + dev->phy.def->name = dev->phy_dev->drv->name; + dev->phy.def->ops = &emac_dt_mdio_phy_ops; + ethtool_convert_link_mode_to_legacy_u32(&dev->phy.features, + dev->phy_dev->supported); + dev->phy.address = dev->phy_dev->mdio.addr; + dev->phy.mode = dev->phy_dev->interface; + return 0; +} + +static int emac_dt_phy_probe(struct emac_instance *dev) +{ + struct device_node *np = dev->ofdev->dev.of_node; + struct device_node *phy_handle; + int res = 1; + + phy_handle = of_parse_phandle(np, "phy-handle", 0); + + if (phy_handle) { + res = emac_dt_mdio_probe(dev); + if (!res) { + res = emac_dt_phy_connect(dev, phy_handle); + if (res) + mdiobus_unregister(dev->mii_bus); + } + } + + of_node_put(phy_handle); + return res; +} + +static int emac_init_phy(struct emac_instance *dev) +{ + struct device_node *np = dev->ofdev->dev.of_node; + struct net_device *ndev = dev->ndev; + u32 phy_map, adv; + int i; + + dev->phy.dev = ndev; + dev->phy.mode = dev->phy_mode; + + /* PHY-less configuration. */ + if ((dev->phy_address == 0xffffffff && dev->phy_map == 0xffffffff) || + of_phy_is_fixed_link(np)) { + emac_reset(dev); + + /* PHY-less configuration. */ + dev->phy.address = -1; + dev->phy.features = SUPPORTED_MII; + if (emac_phy_supports_gige(dev->phy_mode)) + dev->phy.features |= SUPPORTED_1000baseT_Full; + else + dev->phy.features |= SUPPORTED_100baseT_Full; + dev->phy.pause = 1; + + if (of_phy_is_fixed_link(np)) { + int res = emac_dt_mdio_probe(dev); + + if (res) + return res; + + res = of_phy_register_fixed_link(np); + dev->phy_dev = of_phy_find_device(np); + if (res || !dev->phy_dev) { + mdiobus_unregister(dev->mii_bus); + return res ? res : -EINVAL; + } + emac_adjust_link(dev->ndev); + put_device(&dev->phy_dev->mdio.dev); + } + return 0; + } + + mutex_lock(&emac_phy_map_lock); + phy_map = dev->phy_map | busy_phy_map; + + DBG(dev, "PHY maps %08x %08x" NL, dev->phy_map, busy_phy_map); + + dev->phy.mdio_read = emac_mdio_read; + dev->phy.mdio_write = emac_mdio_write; + + /* Enable internal clock source */ +#ifdef CONFIG_PPC_DCR_NATIVE + if (emac_has_feature(dev, EMAC_FTR_440GX_PHY_CLK_FIX)) + dcri_clrset(SDR0, SDR0_MFR, 0, SDR0_MFR_ECS); +#endif + /* PHY clock workaround */ + emac_rx_clk_tx(dev); + + /* Enable internal clock source on 440GX*/ +#ifdef CONFIG_PPC_DCR_NATIVE + if (emac_has_feature(dev, EMAC_FTR_440GX_PHY_CLK_FIX)) + dcri_clrset(SDR0, SDR0_MFR, 0, SDR0_MFR_ECS); +#endif + /* Configure EMAC with defaults so we can at least use MDIO + * This is needed mostly for 440GX + */ + if (emac_phy_gpcs(dev->phy.mode)) { + /* XXX + * Make GPCS PHY address equal to EMAC index. + * We probably should take into account busy_phy_map + * and/or phy_map here. + * + * Note that the busy_phy_map is currently global + * while it should probably be per-ASIC... + */ + dev->phy.gpcs_address = dev->gpcs_address; + if (dev->phy.gpcs_address == 0xffffffff) + dev->phy.address = dev->cell_index; + } + + emac_configure(dev); + + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) { + int res = emac_dt_phy_probe(dev); + + switch (res) { + case 1: + /* No phy-handle property configured. + * Continue with the existing phy probe + * and setup code. + */ + break; + + case 0: + mutex_unlock(&emac_phy_map_lock); + goto init_phy; + + default: + mutex_unlock(&emac_phy_map_lock); + dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n", + res); + return res; + } + } + + if (dev->phy_address != 0xffffffff) + phy_map = ~(1 << dev->phy_address); + + for (i = 0; i < 0x20; phy_map >>= 1, ++i) + if (!(phy_map & 1)) { + int r; + busy_phy_map |= 1 << i; + + /* Quick check if there is a PHY at the address */ + r = emac_mdio_read(dev->ndev, i, MII_BMCR); + if (r == 0xffff || r < 0) + continue; + if (!emac_mii_phy_probe(&dev->phy, i)) + break; + } + + /* Enable external clock source */ +#ifdef CONFIG_PPC_DCR_NATIVE + if (emac_has_feature(dev, EMAC_FTR_440GX_PHY_CLK_FIX)) + dcri_clrset(SDR0, SDR0_MFR, SDR0_MFR_ECS, 0); +#endif + mutex_unlock(&emac_phy_map_lock); + if (i == 0x20) { + printk(KERN_WARNING "%pOF: can't find PHY!\n", np); + return -ENXIO; + } + + init_phy: + /* Init PHY */ + if (dev->phy.def->ops->init) + dev->phy.def->ops->init(&dev->phy); + + /* Disable any PHY features not supported by the platform */ + dev->phy.def->features &= ~dev->phy_feat_exc; + dev->phy.features &= ~dev->phy_feat_exc; + + /* Setup initial link parameters */ + if (dev->phy.features & SUPPORTED_Autoneg) { + adv = dev->phy.features; + if (!emac_has_feature(dev, EMAC_FTR_NO_FLOW_CONTROL_40x)) + adv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; + /* Restart autonegotiation */ + dev->phy.def->ops->setup_aneg(&dev->phy, adv); + } else { + u32 f = dev->phy.def->features; + int speed = SPEED_10, fd = DUPLEX_HALF; + + /* Select highest supported speed/duplex */ + if (f & SUPPORTED_1000baseT_Full) { + speed = SPEED_1000; + fd = DUPLEX_FULL; + } else if (f & SUPPORTED_1000baseT_Half) + speed = SPEED_1000; + else if (f & SUPPORTED_100baseT_Full) { + speed = SPEED_100; + fd = DUPLEX_FULL; + } else if (f & SUPPORTED_100baseT_Half) + speed = SPEED_100; + else if (f & SUPPORTED_10baseT_Full) + fd = DUPLEX_FULL; + + /* Force link parameters */ + dev->phy.def->ops->setup_forced(&dev->phy, speed, fd); + } + return 0; +} + +static int emac_init_config(struct emac_instance *dev) +{ + struct device_node *np = dev->ofdev->dev.of_node; + int err; + + /* Read config from device-tree */ + if (emac_read_uint_prop(np, "mal-device", &dev->mal_ph, 1)) + return -ENXIO; + if (emac_read_uint_prop(np, "mal-tx-channel", &dev->mal_tx_chan, 1)) + return -ENXIO; + if (emac_read_uint_prop(np, "mal-rx-channel", &dev->mal_rx_chan, 1)) + return -ENXIO; + if (emac_read_uint_prop(np, "cell-index", &dev->cell_index, 1)) + return -ENXIO; + if (emac_read_uint_prop(np, "max-frame-size", &dev->max_mtu, 0)) + dev->max_mtu = ETH_DATA_LEN; + if (emac_read_uint_prop(np, "rx-fifo-size", &dev->rx_fifo_size, 0)) + dev->rx_fifo_size = 2048; + if (emac_read_uint_prop(np, "tx-fifo-size", &dev->tx_fifo_size, 0)) + dev->tx_fifo_size = 2048; + if (emac_read_uint_prop(np, "rx-fifo-size-gige", &dev->rx_fifo_size_gige, 0)) + dev->rx_fifo_size_gige = dev->rx_fifo_size; + if (emac_read_uint_prop(np, "tx-fifo-size-gige", &dev->tx_fifo_size_gige, 0)) + dev->tx_fifo_size_gige = dev->tx_fifo_size; + if (emac_read_uint_prop(np, "phy-address", &dev->phy_address, 0)) + dev->phy_address = 0xffffffff; + if (emac_read_uint_prop(np, "phy-map", &dev->phy_map, 0)) + dev->phy_map = 0xffffffff; + if (emac_read_uint_prop(np, "gpcs-address", &dev->gpcs_address, 0)) + dev->gpcs_address = 0xffffffff; + if (emac_read_uint_prop(np->parent, "clock-frequency", &dev->opb_bus_freq, 1)) + return -ENXIO; + if (emac_read_uint_prop(np, "tah-device", &dev->tah_ph, 0)) + dev->tah_ph = 0; + if (emac_read_uint_prop(np, "tah-channel", &dev->tah_port, 0)) + dev->tah_port = 0; + if (emac_read_uint_prop(np, "mdio-device", &dev->mdio_ph, 0)) + dev->mdio_ph = 0; + if (emac_read_uint_prop(np, "zmii-device", &dev->zmii_ph, 0)) + dev->zmii_ph = 0; + if (emac_read_uint_prop(np, "zmii-channel", &dev->zmii_port, 0)) + dev->zmii_port = 0xffffffff; + if (emac_read_uint_prop(np, "rgmii-device", &dev->rgmii_ph, 0)) + dev->rgmii_ph = 0; + if (emac_read_uint_prop(np, "rgmii-channel", &dev->rgmii_port, 0)) + dev->rgmii_port = 0xffffffff; + if (emac_read_uint_prop(np, "fifo-entry-size", &dev->fifo_entry_size, 0)) + dev->fifo_entry_size = 16; + if (emac_read_uint_prop(np, "mal-burst-size", &dev->mal_burst_size, 0)) + dev->mal_burst_size = 256; + + /* PHY mode needs some decoding */ + err = of_get_phy_mode(np, &dev->phy_mode); + if (err) + dev->phy_mode = PHY_INTERFACE_MODE_NA; + + /* Check EMAC version */ + if (of_device_is_compatible(np, "ibm,emac4sync")) { + dev->features |= (EMAC_FTR_EMAC4 | EMAC_FTR_EMAC4SYNC); + if (of_device_is_compatible(np, "ibm,emac-460ex") || + of_device_is_compatible(np, "ibm,emac-460gt")) + dev->features |= EMAC_FTR_460EX_PHY_CLK_FIX; + if (of_device_is_compatible(np, "ibm,emac-405ex") || + of_device_is_compatible(np, "ibm,emac-405exr")) + dev->features |= EMAC_FTR_440EP_PHY_CLK_FIX; + if (of_device_is_compatible(np, "ibm,emac-apm821xx")) { + dev->features |= (EMAC_APM821XX_REQ_JUMBO_FRAME_SIZE | + EMAC_FTR_APM821XX_NO_HALF_DUPLEX | + EMAC_FTR_460EX_PHY_CLK_FIX); + } + } else if (of_device_is_compatible(np, "ibm,emac4")) { + dev->features |= EMAC_FTR_EMAC4; + if (of_device_is_compatible(np, "ibm,emac-440gx")) + dev->features |= EMAC_FTR_440GX_PHY_CLK_FIX; + } else { + if (of_device_is_compatible(np, "ibm,emac-440ep") || + of_device_is_compatible(np, "ibm,emac-440gr")) + dev->features |= EMAC_FTR_440EP_PHY_CLK_FIX; + if (of_device_is_compatible(np, "ibm,emac-405ez")) { +#ifdef CONFIG_IBM_EMAC_NO_FLOW_CTRL + dev->features |= EMAC_FTR_NO_FLOW_CONTROL_40x; +#else + printk(KERN_ERR "%pOF: Flow control not disabled!\n", + np); + return -ENXIO; +#endif + } + + } + + /* Fixup some feature bits based on the device tree */ + if (of_get_property(np, "has-inverted-stacr-oc", NULL)) + dev->features |= EMAC_FTR_STACR_OC_INVERT; + if (of_get_property(np, "has-new-stacr-staopc", NULL)) + dev->features |= EMAC_FTR_HAS_NEW_STACR; + + /* CAB lacks the appropriate properties */ + if (of_device_is_compatible(np, "ibm,emac-axon")) + dev->features |= EMAC_FTR_HAS_NEW_STACR | + EMAC_FTR_STACR_OC_INVERT; + + /* Enable TAH/ZMII/RGMII features as found */ + if (dev->tah_ph != 0) { +#ifdef CONFIG_IBM_EMAC_TAH + dev->features |= EMAC_FTR_HAS_TAH; +#else + printk(KERN_ERR "%pOF: TAH support not enabled !\n", np); + return -ENXIO; +#endif + } + + if (dev->zmii_ph != 0) { +#ifdef CONFIG_IBM_EMAC_ZMII + dev->features |= EMAC_FTR_HAS_ZMII; +#else + printk(KERN_ERR "%pOF: ZMII support not enabled !\n", np); + return -ENXIO; +#endif + } + + if (dev->rgmii_ph != 0) { +#ifdef CONFIG_IBM_EMAC_RGMII + dev->features |= EMAC_FTR_HAS_RGMII; +#else + printk(KERN_ERR "%pOF: RGMII support not enabled !\n", np); + return -ENXIO; +#endif + } + + /* Read MAC-address */ + err = of_get_ethdev_address(np, dev->ndev); + if (err) + return dev_err_probe(&dev->ofdev->dev, err, + "Can't get valid [local-]mac-address from OF !\n"); + + /* IAHT and GAHT filter parameterization */ + if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) { + dev->xaht_slots_shift = EMAC4SYNC_XAHT_SLOTS_SHIFT; + dev->xaht_width_shift = EMAC4SYNC_XAHT_WIDTH_SHIFT; + } else { + dev->xaht_slots_shift = EMAC4_XAHT_SLOTS_SHIFT; + dev->xaht_width_shift = EMAC4_XAHT_WIDTH_SHIFT; + } + + /* This should never happen */ + if (WARN_ON(EMAC_XAHT_REGS(dev) > EMAC_XAHT_MAX_REGS)) + return -ENXIO; + + DBG(dev, "features : 0x%08x / 0x%08x\n", dev->features, EMAC_FTRS_POSSIBLE); + DBG(dev, "tx_fifo_size : %d (%d gige)\n", dev->tx_fifo_size, dev->tx_fifo_size_gige); + DBG(dev, "rx_fifo_size : %d (%d gige)\n", dev->rx_fifo_size, dev->rx_fifo_size_gige); + DBG(dev, "max_mtu : %d\n", dev->max_mtu); + DBG(dev, "OPB freq : %d\n", dev->opb_bus_freq); + + return 0; +} + +static const struct net_device_ops emac_netdev_ops = { + .ndo_open = emac_open, + .ndo_stop = emac_close, + .ndo_get_stats = emac_stats, + .ndo_set_rx_mode = emac_set_multicast_list, + .ndo_eth_ioctl = emac_ioctl, + .ndo_tx_timeout = emac_tx_timeout, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = emac_set_mac_address, + .ndo_start_xmit = emac_start_xmit, +}; + +static const struct net_device_ops emac_gige_netdev_ops = { + .ndo_open = emac_open, + .ndo_stop = emac_close, + .ndo_get_stats = emac_stats, + .ndo_set_rx_mode = emac_set_multicast_list, + .ndo_eth_ioctl = emac_ioctl, + .ndo_tx_timeout = emac_tx_timeout, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = emac_set_mac_address, + .ndo_start_xmit = emac_start_xmit_sg, + .ndo_change_mtu = emac_change_mtu, +}; + +static int emac_probe(struct platform_device *ofdev) +{ + struct net_device *ndev; + struct emac_instance *dev; + struct device_node *np = ofdev->dev.of_node; + struct device_node **blist = NULL; + int err, i; + + /* Skip unused/unwired EMACS. We leave the check for an unused + * property here for now, but new flat device trees should set a + * status property to "disabled" instead. + */ + if (of_get_property(np, "unused", NULL) || !of_device_is_available(np)) + return -ENODEV; + + /* Find ourselves in the bootlist if we are there */ + for (i = 0; i < EMAC_BOOT_LIST_SIZE; i++) + if (emac_boot_list[i] == np) + blist = &emac_boot_list[i]; + + /* Allocate our net_device structure */ + err = -ENOMEM; + ndev = alloc_etherdev(sizeof(struct emac_instance)); + if (!ndev) + goto err_gone; + + dev = netdev_priv(ndev); + dev->ndev = ndev; + dev->ofdev = ofdev; + dev->blist = blist; + SET_NETDEV_DEV(ndev, &ofdev->dev); + + /* Initialize some embedded data structures */ + mutex_init(&dev->mdio_lock); + mutex_init(&dev->link_lock); + spin_lock_init(&dev->lock); + INIT_WORK(&dev->reset_work, emac_reset_work); + + /* Init various config data based on device-tree */ + err = emac_init_config(dev); + if (err) + goto err_free; + + /* Get interrupts. EMAC irq is mandatory, WOL irq is optional */ + dev->emac_irq = irq_of_parse_and_map(np, 0); + dev->wol_irq = irq_of_parse_and_map(np, 1); + if (!dev->emac_irq) { + printk(KERN_ERR "%pOF: Can't map main interrupt\n", np); + err = -ENODEV; + goto err_free; + } + ndev->irq = dev->emac_irq; + + /* Map EMAC regs */ + // TODO : platform_get_resource() and devm_ioremap_resource() + dev->emacp = of_iomap(np, 0); + if (dev->emacp == NULL) { + printk(KERN_ERR "%pOF: Can't map device registers!\n", np); + err = -ENOMEM; + goto err_irq_unmap; + } + + /* Wait for dependent devices */ + err = emac_wait_deps(dev); + if (err) { + printk(KERN_ERR + "%pOF: Timeout waiting for dependent devices\n", np); + /* display more info about what's missing ? */ + goto err_reg_unmap; + } + dev->mal = platform_get_drvdata(dev->mal_dev); + if (dev->mdio_dev != NULL) + dev->mdio_instance = platform_get_drvdata(dev->mdio_dev); + + /* Register with MAL */ + dev->commac.ops = &emac_commac_ops; + dev->commac.dev = dev; + dev->commac.tx_chan_mask = MAL_CHAN_MASK(dev->mal_tx_chan); + dev->commac.rx_chan_mask = MAL_CHAN_MASK(dev->mal_rx_chan); + err = mal_register_commac(dev->mal, &dev->commac); + if (err) { + printk(KERN_ERR "%pOF: failed to register with mal %pOF!\n", + np, dev->mal_dev->dev.of_node); + goto err_rel_deps; + } + dev->rx_skb_size = emac_rx_skb_size(ndev->mtu); + dev->rx_sync_size = emac_rx_sync_size(ndev->mtu); + + /* Get pointers to BD rings */ + dev->tx_desc = + dev->mal->bd_virt + mal_tx_bd_offset(dev->mal, dev->mal_tx_chan); + dev->rx_desc = + dev->mal->bd_virt + mal_rx_bd_offset(dev->mal, dev->mal_rx_chan); + + DBG(dev, "tx_desc %p" NL, dev->tx_desc); + DBG(dev, "rx_desc %p" NL, dev->rx_desc); + + /* Clean rings */ + memset(dev->tx_desc, 0, NUM_TX_BUFF * sizeof(struct mal_descriptor)); + memset(dev->rx_desc, 0, NUM_RX_BUFF * sizeof(struct mal_descriptor)); + memset(dev->tx_skb, 0, NUM_TX_BUFF * sizeof(struct sk_buff *)); + memset(dev->rx_skb, 0, NUM_RX_BUFF * sizeof(struct sk_buff *)); + + /* Attach to ZMII, if needed */ + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII) && + (err = zmii_attach(dev->zmii_dev, dev->zmii_port, &dev->phy_mode)) != 0) + goto err_unreg_commac; + + /* Attach to RGMII, if needed */ + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII) && + (err = rgmii_attach(dev->rgmii_dev, dev->rgmii_port, dev->phy_mode)) != 0) + goto err_detach_zmii; + + /* Attach to TAH, if needed */ + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH) && + (err = tah_attach(dev->tah_dev, dev->tah_port)) != 0) + goto err_detach_rgmii; + + /* Set some link defaults before we can find out real parameters */ + dev->phy.speed = SPEED_100; + dev->phy.duplex = DUPLEX_FULL; + dev->phy.autoneg = AUTONEG_DISABLE; + dev->phy.pause = dev->phy.asym_pause = 0; + dev->stop_timeout = STOP_TIMEOUT_100; + INIT_DELAYED_WORK(&dev->link_work, emac_link_timer); + + /* Some SoCs like APM821xx does not support Half Duplex mode. */ + if (emac_has_feature(dev, EMAC_FTR_APM821XX_NO_HALF_DUPLEX)) { + dev->phy_feat_exc = (SUPPORTED_1000baseT_Half | + SUPPORTED_100baseT_Half | + SUPPORTED_10baseT_Half); + } + + /* Find PHY if any */ + err = emac_init_phy(dev); + if (err != 0) + goto err_detach_tah; + + if (dev->tah_dev) { + ndev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG; + ndev->features |= ndev->hw_features | NETIF_F_RXCSUM; + } + ndev->watchdog_timeo = 5 * HZ; + if (emac_phy_supports_gige(dev->phy_mode)) { + ndev->netdev_ops = &emac_gige_netdev_ops; + dev->commac.ops = &emac_commac_sg_ops; + } else + ndev->netdev_ops = &emac_netdev_ops; + ndev->ethtool_ops = &emac_ethtool_ops; + + /* MTU range: 46 - 1500 or whatever is in OF */ + ndev->min_mtu = EMAC_MIN_MTU; + ndev->max_mtu = dev->max_mtu; + + netif_carrier_off(ndev); + + err = register_netdev(ndev); + if (err) { + printk(KERN_ERR "%pOF: failed to register net device (%d)!\n", + np, err); + goto err_detach_tah; + } + + /* Set our drvdata last as we don't want them visible until we are + * fully initialized + */ + wmb(); + platform_set_drvdata(ofdev, dev); + + /* There's a new kid in town ! Let's tell everybody */ + wake_up_all(&emac_probe_wait); + + + printk(KERN_INFO "%s: EMAC-%d %pOF, MAC %pM\n", + ndev->name, dev->cell_index, np, ndev->dev_addr); + + if (dev->phy_mode == PHY_INTERFACE_MODE_SGMII) + printk(KERN_NOTICE "%s: in SGMII mode\n", ndev->name); + + if (dev->phy.address >= 0) + printk("%s: found %s PHY (0x%02x)\n", ndev->name, + dev->phy.def->name, dev->phy.address); + + /* Life is good */ + return 0; + + /* I have a bad feeling about this ... */ + + err_detach_tah: + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) + tah_detach(dev->tah_dev, dev->tah_port); + err_detach_rgmii: + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_detach(dev->rgmii_dev, dev->rgmii_port); + err_detach_zmii: + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_detach(dev->zmii_dev, dev->zmii_port); + err_unreg_commac: + mal_unregister_commac(dev->mal, &dev->commac); + err_rel_deps: + emac_put_deps(dev); + err_reg_unmap: + iounmap(dev->emacp); + err_irq_unmap: + if (dev->wol_irq) + irq_dispose_mapping(dev->wol_irq); + if (dev->emac_irq) + irq_dispose_mapping(dev->emac_irq); + err_free: + free_netdev(ndev); + err_gone: + /* if we were on the bootlist, remove us as we won't show up and + * wake up all waiters to notify them in case they were waiting + * on us + */ + if (blist) { + *blist = NULL; + wake_up_all(&emac_probe_wait); + } + return err; +} + +static int emac_remove(struct platform_device *ofdev) +{ + struct emac_instance *dev = platform_get_drvdata(ofdev); + + DBG(dev, "remove" NL); + + unregister_netdev(dev->ndev); + + cancel_work_sync(&dev->reset_work); + + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) + tah_detach(dev->tah_dev, dev->tah_port); + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_detach(dev->rgmii_dev, dev->rgmii_port); + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_detach(dev->zmii_dev, dev->zmii_port); + + if (dev->phy_dev) + phy_disconnect(dev->phy_dev); + + if (dev->mii_bus) + mdiobus_unregister(dev->mii_bus); + + busy_phy_map &= ~(1 << dev->phy.address); + DBG(dev, "busy_phy_map now %#x" NL, busy_phy_map); + + mal_unregister_commac(dev->mal, &dev->commac); + emac_put_deps(dev); + + iounmap(dev->emacp); + + if (dev->wol_irq) + irq_dispose_mapping(dev->wol_irq); + if (dev->emac_irq) + irq_dispose_mapping(dev->emac_irq); + + free_netdev(dev->ndev); + + return 0; +} + +/* XXX Features in here should be replaced by properties... */ +static const struct of_device_id emac_match[] = +{ + { + .type = "network", + .compatible = "ibm,emac", + }, + { + .type = "network", + .compatible = "ibm,emac4", + }, + { + .type = "network", + .compatible = "ibm,emac4sync", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, emac_match); + +static struct platform_driver emac_driver = { + .driver = { + .name = "emac", + .of_match_table = emac_match, + }, + .probe = emac_probe, + .remove = emac_remove, +}; + +static void __init emac_make_bootlist(void) +{ + struct device_node *np = NULL; + int j, max, i = 0; + int cell_indices[EMAC_BOOT_LIST_SIZE]; + + /* Collect EMACs */ + while((np = of_find_all_nodes(np)) != NULL) { + const u32 *idx; + + if (of_match_node(emac_match, np) == NULL) + continue; + if (of_get_property(np, "unused", NULL)) + continue; + idx = of_get_property(np, "cell-index", NULL); + if (idx == NULL) + continue; + cell_indices[i] = *idx; + emac_boot_list[i++] = of_node_get(np); + if (i >= EMAC_BOOT_LIST_SIZE) { + of_node_put(np); + break; + } + } + max = i; + + /* Bubble sort them (doh, what a creative algorithm :-) */ + for (i = 0; max > 1 && (i < (max - 1)); i++) + for (j = i; j < max; j++) { + if (cell_indices[i] > cell_indices[j]) { + swap(emac_boot_list[i], emac_boot_list[j]); + swap(cell_indices[i], cell_indices[j]); + } + } +} + +static int __init emac_init(void) +{ + int rc; + + printk(KERN_INFO DRV_DESC ", version " DRV_VERSION "\n"); + + /* Build EMAC boot list */ + emac_make_bootlist(); + + /* Init submodules */ + rc = mal_init(); + if (rc) + goto err; + rc = zmii_init(); + if (rc) + goto err_mal; + rc = rgmii_init(); + if (rc) + goto err_zmii; + rc = tah_init(); + if (rc) + goto err_rgmii; + rc = platform_driver_register(&emac_driver); + if (rc) + goto err_tah; + + return 0; + + err_tah: + tah_exit(); + err_rgmii: + rgmii_exit(); + err_zmii: + zmii_exit(); + err_mal: + mal_exit(); + err: + return rc; +} + +static void __exit emac_exit(void) +{ + int i; + + platform_driver_unregister(&emac_driver); + + tah_exit(); + rgmii_exit(); + zmii_exit(); + mal_exit(); + + /* Destroy EMAC boot list */ + for (i = 0; i < EMAC_BOOT_LIST_SIZE; i++) + of_node_put(emac_boot_list[i]); +} + +module_init(emac_init); +module_exit(emac_exit); diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h new file mode 100644 index 000000000..89a1b0fea --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -0,0 +1,463 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * drivers/net/ethernet/ibm/emac/core.h + * + * Driver for PowerPC 4xx on-chip ethernet controller. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Armin Kuster <akuster@mvista.com> + * Johnnie Peters <jpeters@mvista.com> + * Copyright 2000, 2001 MontaVista Softare Inc. + */ +#ifndef __IBM_NEWEMAC_CORE_H +#define __IBM_NEWEMAC_CORE_H + +#include <linux/module.h> +#include <linux/list.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/netdevice.h> +#include <linux/dma-mapping.h> +#include <linux/spinlock.h> +#include <linux/of_platform.h> +#include <linux/slab.h> + +#include <asm/io.h> +#include <asm/dcr.h> + +#include "emac.h" +#include "phy.h" +#include "zmii.h" +#include "rgmii.h" +#include "mal.h" +#include "tah.h" +#include "debug.h" + +#define NUM_TX_BUFF CONFIG_IBM_EMAC_TXB +#define NUM_RX_BUFF CONFIG_IBM_EMAC_RXB + +/* Simple sanity check */ +#if NUM_TX_BUFF > 256 || NUM_RX_BUFF > 256 +#error Invalid number of buffer descriptors (greater than 256) +#endif + +#define EMAC_MIN_MTU 46 + +/* Maximum L2 header length (VLAN tagged, no FCS) */ +#define EMAC_MTU_OVERHEAD (6 * 2 + 2 + 4) + +/* RX BD size for the given MTU */ +static inline int emac_rx_size(int mtu) +{ + if (mtu > ETH_DATA_LEN) + return MAL_MAX_RX_SIZE; + else + return mal_rx_size(ETH_DATA_LEN + EMAC_MTU_OVERHEAD); +} + +/* Size of RX skb for the given MTU */ +static inline int emac_rx_skb_size(int mtu) +{ + int size = max(mtu + EMAC_MTU_OVERHEAD, emac_rx_size(mtu)); + + return SKB_DATA_ALIGN(size + NET_IP_ALIGN) + NET_SKB_PAD; +} + +/* RX DMA sync size */ +static inline int emac_rx_sync_size(int mtu) +{ + return SKB_DATA_ALIGN(emac_rx_size(mtu) + NET_IP_ALIGN); +} + +/* Driver statistcs is split into two parts to make it more cache friendly: + * - normal statistics (packet count, etc) + * - error statistics + * + * When statistics is requested by ethtool, these parts are concatenated, + * normal one goes first. + * + * Please, keep these structures in sync with emac_stats_keys. + */ + +/* Normal TX/RX Statistics */ +struct emac_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 rx_packets_csum; + u64 tx_packets_csum; +}; + +/* Error statistics */ +struct emac_error_stats { + u64 tx_undo; + + /* Software RX Errors */ + u64 rx_dropped_stack; + u64 rx_dropped_oom; + u64 rx_dropped_error; + u64 rx_dropped_resize; + u64 rx_dropped_mtu; + u64 rx_stopped; + /* BD reported RX errors */ + u64 rx_bd_errors; + u64 rx_bd_overrun; + u64 rx_bd_bad_packet; + u64 rx_bd_runt_packet; + u64 rx_bd_short_event; + u64 rx_bd_alignment_error; + u64 rx_bd_bad_fcs; + u64 rx_bd_packet_too_long; + u64 rx_bd_out_of_range; + u64 rx_bd_in_range; + /* EMAC IRQ reported RX errors */ + u64 rx_parity; + u64 rx_fifo_overrun; + u64 rx_overrun; + u64 rx_bad_packet; + u64 rx_runt_packet; + u64 rx_short_event; + u64 rx_alignment_error; + u64 rx_bad_fcs; + u64 rx_packet_too_long; + u64 rx_out_of_range; + u64 rx_in_range; + + /* Software TX Errors */ + u64 tx_dropped; + /* BD reported TX errors */ + u64 tx_bd_errors; + u64 tx_bd_bad_fcs; + u64 tx_bd_carrier_loss; + u64 tx_bd_excessive_deferral; + u64 tx_bd_excessive_collisions; + u64 tx_bd_late_collision; + u64 tx_bd_multple_collisions; + u64 tx_bd_single_collision; + u64 tx_bd_underrun; + u64 tx_bd_sqe; + /* EMAC IRQ reported TX errors */ + u64 tx_parity; + u64 tx_underrun; + u64 tx_sqe; + u64 tx_errors; +}; + +#define EMAC_ETHTOOL_STATS_COUNT ((sizeof(struct emac_stats) + \ + sizeof(struct emac_error_stats)) \ + / sizeof(u64)) + +struct emac_instance { + struct net_device *ndev; + struct emac_regs __iomem *emacp; + struct platform_device *ofdev; + struct device_node **blist; /* bootlist entry */ + + /* MAL linkage */ + u32 mal_ph; + struct platform_device *mal_dev; + u32 mal_rx_chan; + u32 mal_tx_chan; + struct mal_instance *mal; + struct mal_commac commac; + + /* PHY infos */ + phy_interface_t phy_mode; + u32 phy_map; + u32 phy_address; + u32 phy_feat_exc; + struct mii_phy phy; + struct mutex link_lock; + struct delayed_work link_work; + int link_polling; + + /* GPCS PHY infos */ + u32 gpcs_address; + + /* Shared MDIO if any */ + u32 mdio_ph; + struct platform_device *mdio_dev; + struct emac_instance *mdio_instance; + struct mutex mdio_lock; + + /* Device-tree based phy configuration */ + struct mii_bus *mii_bus; + struct phy_device *phy_dev; + + /* ZMII infos if any */ + u32 zmii_ph; + u32 zmii_port; + struct platform_device *zmii_dev; + + /* RGMII infos if any */ + u32 rgmii_ph; + u32 rgmii_port; + struct platform_device *rgmii_dev; + + /* TAH infos if any */ + u32 tah_ph; + u32 tah_port; + struct platform_device *tah_dev; + + /* IRQs */ + int wol_irq; + int emac_irq; + + /* OPB bus frequency in Mhz */ + u32 opb_bus_freq; + + /* Cell index within an ASIC (for clk mgmnt) */ + u32 cell_index; + + /* Max supported MTU */ + u32 max_mtu; + + /* Feature bits (from probe table) */ + unsigned int features; + + /* Tx and Rx fifo sizes & other infos in bytes */ + u32 tx_fifo_size; + u32 tx_fifo_size_gige; + u32 rx_fifo_size; + u32 rx_fifo_size_gige; + u32 fifo_entry_size; + u32 mal_burst_size; /* move to MAL ? */ + + /* IAHT and GAHT filter parameterization */ + u32 xaht_slots_shift; + u32 xaht_width_shift; + + /* Descriptor management + */ + struct mal_descriptor *tx_desc; + int tx_cnt; + int tx_slot; + int ack_slot; + + struct mal_descriptor *rx_desc; + int rx_slot; + struct sk_buff *rx_sg_skb; /* 1 */ + int rx_skb_size; + int rx_sync_size; + + struct sk_buff *tx_skb[NUM_TX_BUFF]; + struct sk_buff *rx_skb[NUM_RX_BUFF]; + + /* Stats + */ + struct emac_error_stats estats; + struct emac_stats stats; + + /* Misc + */ + int reset_failed; + int stop_timeout; /* in us */ + int no_mcast; + int mcast_pending; + int opened; + struct work_struct reset_work; + spinlock_t lock; +}; + +/* + * Features of various EMAC implementations + */ + +/* + * No flow control on 40x according to the original driver + */ +#define EMAC_FTR_NO_FLOW_CONTROL_40x 0x00000001 +/* + * Cell is an EMAC4 + */ +#define EMAC_FTR_EMAC4 0x00000002 +/* + * For the 440SPe, AMCC inexplicably changed the polarity of + * the "operation complete" bit in the MII control register. + */ +#define EMAC_FTR_STACR_OC_INVERT 0x00000004 +/* + * Set if we have a TAH. + */ +#define EMAC_FTR_HAS_TAH 0x00000008 +/* + * Set if we have a ZMII. + */ +#define EMAC_FTR_HAS_ZMII 0x00000010 +/* + * Set if we have a RGMII. + */ +#define EMAC_FTR_HAS_RGMII 0x00000020 +/* + * Set if we have new type STACR with STAOPC + */ +#define EMAC_FTR_HAS_NEW_STACR 0x00000040 +/* + * Set if we need phy clock workaround for 440gx + */ +#define EMAC_FTR_440GX_PHY_CLK_FIX 0x00000080 +/* + * Set if we need phy clock workaround for 440ep or 440gr + */ +#define EMAC_FTR_440EP_PHY_CLK_FIX 0x00000100 +/* + * The 405EX and 460EX contain the EMAC4SYNC core + */ +#define EMAC_FTR_EMAC4SYNC 0x00000200 +/* + * Set if we need phy clock workaround for 460ex or 460gt + */ +#define EMAC_FTR_460EX_PHY_CLK_FIX 0x00000400 +/* + * APM821xx requires Jumbo frame size set explicitly + */ +#define EMAC_APM821XX_REQ_JUMBO_FRAME_SIZE 0x00000800 +/* + * APM821xx does not support Half Duplex mode + */ +#define EMAC_FTR_APM821XX_NO_HALF_DUPLEX 0x00001000 + +/* Right now, we don't quite handle the always/possible masks on the + * most optimal way as we don't have a way to say something like + * always EMAC4. Patches welcome. + */ +enum { + EMAC_FTRS_ALWAYS = 0, + + EMAC_FTRS_POSSIBLE = +#ifdef CONFIG_IBM_EMAC_EMAC4 + EMAC_FTR_EMAC4 | EMAC_FTR_EMAC4SYNC | + EMAC_FTR_HAS_NEW_STACR | + EMAC_FTR_STACR_OC_INVERT | EMAC_FTR_440GX_PHY_CLK_FIX | +#endif +#ifdef CONFIG_IBM_EMAC_TAH + EMAC_FTR_HAS_TAH | +#endif +#ifdef CONFIG_IBM_EMAC_ZMII + EMAC_FTR_HAS_ZMII | +#endif +#ifdef CONFIG_IBM_EMAC_RGMII + EMAC_FTR_HAS_RGMII | +#endif +#ifdef CONFIG_IBM_EMAC_NO_FLOW_CTRL + EMAC_FTR_NO_FLOW_CONTROL_40x | +#endif + EMAC_FTR_460EX_PHY_CLK_FIX | + EMAC_FTR_440EP_PHY_CLK_FIX | + EMAC_APM821XX_REQ_JUMBO_FRAME_SIZE | + EMAC_FTR_APM821XX_NO_HALF_DUPLEX, +}; + +static inline int emac_has_feature(struct emac_instance *dev, + unsigned long feature) +{ + return (EMAC_FTRS_ALWAYS & feature) || + (EMAC_FTRS_POSSIBLE & dev->features & feature); +} + +/* + * Various instances of the EMAC core have varying 1) number of + * address match slots, 2) width of the registers for handling address + * match slots, 3) number of registers for handling address match + * slots and 4) base offset for those registers. + * + * These macros and inlines handle these differences based on + * parameters supplied by the device structure which are, in turn, + * initialized based on the "compatible" entry in the device tree. + */ + +#define EMAC4_XAHT_SLOTS_SHIFT 6 +#define EMAC4_XAHT_WIDTH_SHIFT 4 + +#define EMAC4SYNC_XAHT_SLOTS_SHIFT 8 +#define EMAC4SYNC_XAHT_WIDTH_SHIFT 5 + +/* The largest span between slots and widths above is 3 */ +#define EMAC_XAHT_MAX_REGS (1 << 3) + +#define EMAC_XAHT_SLOTS(dev) (1 << (dev)->xaht_slots_shift) +#define EMAC_XAHT_WIDTH(dev) (1 << (dev)->xaht_width_shift) +#define EMAC_XAHT_REGS(dev) (1 << ((dev)->xaht_slots_shift - \ + (dev)->xaht_width_shift)) + +#define EMAC_XAHT_CRC_TO_SLOT(dev, crc) \ + ((EMAC_XAHT_SLOTS(dev) - 1) - \ + ((crc) >> ((sizeof (u32) * BITS_PER_BYTE) - \ + (dev)->xaht_slots_shift))) + +#define EMAC_XAHT_SLOT_TO_REG(dev, slot) \ + ((slot) >> (dev)->xaht_width_shift) + +#define EMAC_XAHT_SLOT_TO_MASK(dev, slot) \ + ((u32)(1 << (EMAC_XAHT_WIDTH(dev) - 1)) >> \ + ((slot) & (u32)(EMAC_XAHT_WIDTH(dev) - 1))) + +static inline u32 *emac_xaht_base(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + int offset; + + /* The first IAHT entry always is the base of the block of + * IAHT and GAHT registers. + */ + if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) + offset = offsetof(struct emac_regs, u1.emac4sync.iaht1); + else + offset = offsetof(struct emac_regs, u0.emac4.iaht1); + + return (u32 *)((ptrdiff_t)p + offset); +} + +static inline u32 *emac_gaht_base(struct emac_instance *dev) +{ + /* GAHT registers always come after an identical number of + * IAHT registers. + */ + return emac_xaht_base(dev) + EMAC_XAHT_REGS(dev); +} + +static inline u32 *emac_iaht_base(struct emac_instance *dev) +{ + /* IAHT registers always come before an identical number of + * GAHT registers. + */ + return emac_xaht_base(dev); +} + +/* Ethtool get_regs complex data. + * We want to get not just EMAC registers, but also MAL, ZMII, RGMII, TAH + * when available. + * + * Returned BLOB consists of the ibm_emac_ethtool_regs_hdr, + * MAL registers, EMAC registers and optional ZMII, RGMII, TAH registers. + * Each register component is preceded with emac_ethtool_regs_subhdr. + * Order of the optional headers follows their relative bit posititions + * in emac_ethtool_regs_hdr.components + */ +#define EMAC_ETHTOOL_REGS_ZMII 0x00000001 +#define EMAC_ETHTOOL_REGS_RGMII 0x00000002 +#define EMAC_ETHTOOL_REGS_TAH 0x00000004 + +struct emac_ethtool_regs_hdr { + u32 components; +}; + +struct emac_ethtool_regs_subhdr { + u32 version; + u32 index; +}; + +#define EMAC_ETHTOOL_REGS_VER 3 +#define EMAC4_ETHTOOL_REGS_VER 4 +#define EMAC4SYNC_ETHTOOL_REGS_VER 5 + +#endif /* __IBM_NEWEMAC_CORE_H */ diff --git a/drivers/net/ethernet/ibm/emac/debug.h b/drivers/net/ethernet/ibm/emac/debug.h new file mode 100644 index 000000000..c09a46a32 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/debug.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * drivers/net/ethernet/ibm/emac/debug.h + * + * Driver for PowerPC 4xx on-chip ethernet controller, debug print routines. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + */ +#ifndef __IBM_NEWEMAC_DEBUG_H +#define __IBM_NEWEMAC_DEBUG_H + +#include <linux/init.h> + +#include "core.h" + +#if defined(CONFIG_IBM_EMAC_DEBUG) +# define DBG_LEVEL 1 +#else +# define DBG_LEVEL 0 +#endif + +#define EMAC_DBG(d, name, fmt, arg...) \ + printk(KERN_DEBUG #name "%pOF: " fmt, d->ofdev->dev.of_node, ## arg) + +#if DBG_LEVEL > 0 +# define DBG(d,f,x...) EMAC_DBG(d, emac, f, ##x) +# define MAL_DBG(d,f,x...) EMAC_DBG(d, mal, f, ##x) +# define ZMII_DBG(d,f,x...) EMAC_DBG(d, zmii, f, ##x) +# define RGMII_DBG(d,f,x...) EMAC_DBG(d, rgmii, f, ##x) +# define NL "\n" +#else +# define DBG(f,x...) ((void)0) +# define MAL_DBG(d,f,x...) ((void)0) +# define ZMII_DBG(d,f,x...) ((void)0) +# define RGMII_DBG(d,f,x...) ((void)0) +#endif +#if DBG_LEVEL > 1 +# define DBG2(d,f,x...) DBG(d,f, ##x) +# define MAL_DBG2(d,f,x...) MAL_DBG(d,f, ##x) +# define ZMII_DBG2(d,f,x...) ZMII_DBG(d,f, ##x) +# define RGMII_DBG2(d,f,x...) RGMII_DBG(d,f, ##x) +#else +# define DBG2(f,x...) ((void)0) +# define MAL_DBG2(d,f,x...) ((void)0) +# define ZMII_DBG2(d,f,x...) ((void)0) +# define RGMII_DBG2(d,f,x...) ((void)0) +#endif + +#endif /* __IBM_NEWEMAC_DEBUG_H */ diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h new file mode 100644 index 000000000..09d3ac374 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/emac.h @@ -0,0 +1,298 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * drivers/net/ethernet/ibm/emac/emac.h + * + * Register definitions for PowerPC 4xx on-chip ethernet contoller + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Matt Porter <mporter@kernel.crashing.org> + * Armin Kuster <akuster@mvista.com> + * Copyright 2002-2004 MontaVista Software Inc. + */ +#ifndef __IBM_NEWEMAC_H +#define __IBM_NEWEMAC_H + +#include <linux/types.h> +#include <linux/phy.h> + +/* EMAC registers Write Access rules */ +struct emac_regs { + /* Common registers across all EMAC implementations. */ + u32 mr0; /* Special */ + u32 mr1; /* Reset */ + u32 tmr0; /* Special */ + u32 tmr1; /* Special */ + u32 rmr; /* Reset */ + u32 isr; /* Always */ + u32 iser; /* Reset */ + u32 iahr; /* Reset, R, T */ + u32 ialr; /* Reset, R, T */ + u32 vtpid; /* Reset, R, T */ + u32 vtci; /* Reset, R, T */ + u32 ptr; /* Reset, T */ + union { + /* Registers unique to EMAC4 implementations */ + struct { + u32 iaht1; /* Reset, R */ + u32 iaht2; /* Reset, R */ + u32 iaht3; /* Reset, R */ + u32 iaht4; /* Reset, R */ + u32 gaht1; /* Reset, R */ + u32 gaht2; /* Reset, R */ + u32 gaht3; /* Reset, R */ + u32 gaht4; /* Reset, R */ + } emac4; + /* Registers unique to EMAC4SYNC implementations */ + struct { + u32 mahr; /* Reset, R, T */ + u32 malr; /* Reset, R, T */ + u32 mmahr; /* Reset, R, T */ + u32 mmalr; /* Reset, R, T */ + u32 rsvd0[4]; + } emac4sync; + } u0; + /* Common registers across all EMAC implementations. */ + u32 lsah; + u32 lsal; + u32 ipgvr; /* Reset, T */ + u32 stacr; /* Special */ + u32 trtr; /* Special */ + u32 rwmr; /* Reset */ + u32 octx; + u32 ocrx; + union { + /* Registers unique to EMAC4 implementations */ + struct { + u32 ipcr; + } emac4; + /* Registers unique to EMAC4SYNC implementations */ + struct { + u32 rsvd1; + u32 revid; + u32 rsvd2[2]; + u32 iaht1; /* Reset, R */ + u32 iaht2; /* Reset, R */ + u32 iaht3; /* Reset, R */ + u32 iaht4; /* Reset, R */ + u32 iaht5; /* Reset, R */ + u32 iaht6; /* Reset, R */ + u32 iaht7; /* Reset, R */ + u32 iaht8; /* Reset, R */ + u32 gaht1; /* Reset, R */ + u32 gaht2; /* Reset, R */ + u32 gaht3; /* Reset, R */ + u32 gaht4; /* Reset, R */ + u32 gaht5; /* Reset, R */ + u32 gaht6; /* Reset, R */ + u32 gaht7; /* Reset, R */ + u32 gaht8; /* Reset, R */ + u32 tpc; /* Reset, T */ + } emac4sync; + } u1; +}; + +/* EMACx_MR0 */ +#define EMAC_MR0_RXI 0x80000000 +#define EMAC_MR0_TXI 0x40000000 +#define EMAC_MR0_SRST 0x20000000 +#define EMAC_MR0_TXE 0x10000000 +#define EMAC_MR0_RXE 0x08000000 +#define EMAC_MR0_WKE 0x04000000 + +/* EMACx_MR1 */ +#define EMAC_MR1_FDE 0x80000000 +#define EMAC_MR1_ILE 0x40000000 +#define EMAC_MR1_VLE 0x20000000 +#define EMAC_MR1_EIFC 0x10000000 +#define EMAC_MR1_APP 0x08000000 +#define EMAC_MR1_IST 0x01000000 + +#define EMAC_MR1_MF_MASK 0x00c00000 +#define EMAC_MR1_MF_10 0x00000000 +#define EMAC_MR1_MF_100 0x00400000 +#define EMAC_MR1_MF_1000 0x00800000 +#define EMAC_MR1_MF_1000GPCS 0x00c00000 +#define EMAC_MR1_MF_IPPA(id) (((id) & 0x1f) << 6) + +#define EMAC_MR1_RFS_4K 0x00300000 +#define EMAC_MR1_RFS_16K 0x00000000 +#define EMAC_MR1_TFS_2K 0x00080000 +#define EMAC_MR1_TR0_MULT 0x00008000 +#define EMAC_MR1_JPSM 0x00000000 +#define EMAC_MR1_MWSW_001 0x00000000 +#define EMAC_MR1_BASE(opb) (EMAC_MR1_TFS_2K | EMAC_MR1_TR0_MULT) + + +#define EMAC4_MR1_RFS_2K 0x00100000 +#define EMAC4_MR1_RFS_4K 0x00180000 +#define EMAC4_MR1_RFS_8K 0x00200000 +#define EMAC4_MR1_RFS_16K 0x00280000 +#define EMAC4_MR1_TFS_2K 0x00020000 +#define EMAC4_MR1_TFS_4K 0x00030000 +#define EMAC4_MR1_TFS_8K 0x00040000 +#define EMAC4_MR1_TFS_16K 0x00050000 +#define EMAC4_MR1_TR 0x00008000 +#define EMAC4_MR1_MWSW_001 0x00001000 +#define EMAC4_MR1_JPSM 0x00000800 +#define EMAC4_MR1_OBCI_MASK 0x00000038 +#define EMAC4_MR1_OBCI_50 0x00000000 +#define EMAC4_MR1_OBCI_66 0x00000008 +#define EMAC4_MR1_OBCI_83 0x00000010 +#define EMAC4_MR1_OBCI_100 0x00000018 +#define EMAC4_MR1_OBCI_100P 0x00000020 +#define EMAC4_MR1_OBCI(freq) ((freq) <= 50 ? EMAC4_MR1_OBCI_50 : \ + (freq) <= 66 ? EMAC4_MR1_OBCI_66 : \ + (freq) <= 83 ? EMAC4_MR1_OBCI_83 : \ + (freq) <= 100 ? EMAC4_MR1_OBCI_100 : \ + EMAC4_MR1_OBCI_100P) + +/* EMACx_TMR0 */ +#define EMAC_TMR0_GNP 0x80000000 +#define EMAC_TMR0_DEFAULT 0x00000000 +#define EMAC4_TMR0_TFAE_2_32 0x00000001 +#define EMAC4_TMR0_TFAE_4_64 0x00000002 +#define EMAC4_TMR0_TFAE_8_128 0x00000003 +#define EMAC4_TMR0_TFAE_16_256 0x00000004 +#define EMAC4_TMR0_TFAE_32_512 0x00000005 +#define EMAC4_TMR0_TFAE_64_1024 0x00000006 +#define EMAC4_TMR0_TFAE_128_2048 0x00000007 +#define EMAC4_TMR0_DEFAULT EMAC4_TMR0_TFAE_2_32 +#define EMAC_TMR0_XMIT (EMAC_TMR0_GNP | EMAC_TMR0_DEFAULT) +#define EMAC4_TMR0_XMIT (EMAC_TMR0_GNP | EMAC4_TMR0_DEFAULT) + +/* EMACx_TMR1 */ + +#define EMAC_TMR1(l,h) (((l) << 27) | (((h) & 0xff) << 16)) +#define EMAC4_TMR1(l,h) (((l) << 27) | (((h) & 0x3ff) << 14)) + +/* EMACx_RMR */ +#define EMAC_RMR_SP 0x80000000 +#define EMAC_RMR_SFCS 0x40000000 +#define EMAC_RMR_RRP 0x20000000 +#define EMAC_RMR_RFP 0x10000000 +#define EMAC_RMR_ROP 0x08000000 +#define EMAC_RMR_RPIR 0x04000000 +#define EMAC_RMR_PPP 0x02000000 +#define EMAC_RMR_PME 0x01000000 +#define EMAC_RMR_PMME 0x00800000 +#define EMAC_RMR_IAE 0x00400000 +#define EMAC_RMR_MIAE 0x00200000 +#define EMAC_RMR_BAE 0x00100000 +#define EMAC_RMR_MAE 0x00080000 +#define EMAC_RMR_BASE 0x00000000 +#define EMAC4_RMR_RFAF_2_32 0x00000001 +#define EMAC4_RMR_RFAF_4_64 0x00000002 +#define EMAC4_RMR_RFAF_8_128 0x00000003 +#define EMAC4_RMR_RFAF_16_256 0x00000004 +#define EMAC4_RMR_RFAF_32_512 0x00000005 +#define EMAC4_RMR_RFAF_64_1024 0x00000006 +#define EMAC4_RMR_RFAF_128_2048 0x00000007 +#define EMAC4_RMR_BASE EMAC4_RMR_RFAF_128_2048 +#define EMAC4_RMR_MJS_MASK 0x0001fff8 +#define EMAC4_RMR_MJS(s) (((s) << 3) & EMAC4_RMR_MJS_MASK) + +/* EMACx_ISR & EMACx_ISER */ +#define EMAC4_ISR_TXPE 0x20000000 +#define EMAC4_ISR_RXPE 0x10000000 +#define EMAC4_ISR_TXUE 0x08000000 +#define EMAC4_ISR_RXOE 0x04000000 +#define EMAC_ISR_OVR 0x02000000 +#define EMAC_ISR_PP 0x01000000 +#define EMAC_ISR_BP 0x00800000 +#define EMAC_ISR_RP 0x00400000 +#define EMAC_ISR_SE 0x00200000 +#define EMAC_ISR_ALE 0x00100000 +#define EMAC_ISR_BFCS 0x00080000 +#define EMAC_ISR_PTLE 0x00040000 +#define EMAC_ISR_ORE 0x00020000 +#define EMAC_ISR_IRE 0x00010000 +#define EMAC_ISR_SQE 0x00000080 +#define EMAC_ISR_TE 0x00000040 +#define EMAC_ISR_MOS 0x00000002 +#define EMAC_ISR_MOF 0x00000001 + +/* EMACx_STACR */ +#define EMAC_STACR_PHYD_MASK 0xffff +#define EMAC_STACR_PHYD_SHIFT 16 +#define EMAC_STACR_OC 0x00008000 +#define EMAC_STACR_PHYE 0x00004000 +#define EMAC_STACR_STAC_MASK 0x00003000 +#define EMAC_STACR_STAC_READ 0x00001000 +#define EMAC_STACR_STAC_WRITE 0x00002000 +#define EMAC_STACR_OPBC_MASK 0x00000C00 +#define EMAC_STACR_OPBC_50 0x00000000 +#define EMAC_STACR_OPBC_66 0x00000400 +#define EMAC_STACR_OPBC_83 0x00000800 +#define EMAC_STACR_OPBC_100 0x00000C00 +#define EMAC_STACR_OPBC(freq) ((freq) <= 50 ? EMAC_STACR_OPBC_50 : \ + (freq) <= 66 ? EMAC_STACR_OPBC_66 : \ + (freq) <= 83 ? EMAC_STACR_OPBC_83 : EMAC_STACR_OPBC_100) +#define EMAC_STACR_BASE(opb) EMAC_STACR_OPBC(opb) +#define EMAC4_STACR_BASE(opb) 0x00000000 +#define EMAC_STACR_PCDA_MASK 0x1f +#define EMAC_STACR_PCDA_SHIFT 5 +#define EMAC_STACR_PRA_MASK 0x1f +#define EMACX_STACR_STAC_MASK 0x00003800 +#define EMACX_STACR_STAC_READ 0x00001000 +#define EMACX_STACR_STAC_WRITE 0x00000800 +#define EMACX_STACR_STAC_IND_ADDR 0x00002000 +#define EMACX_STACR_STAC_IND_READ 0x00003800 +#define EMACX_STACR_STAC_IND_READINC 0x00003000 +#define EMACX_STACR_STAC_IND_WRITE 0x00002800 + + +/* EMACx_TRTR */ +#define EMAC_TRTR_SHIFT_EMAC4 24 +#define EMAC_TRTR_SHIFT 27 + +/* EMAC specific TX descriptor control fields (write access) */ +#define EMAC_TX_CTRL_GFCS 0x0200 +#define EMAC_TX_CTRL_GP 0x0100 +#define EMAC_TX_CTRL_ISA 0x0080 +#define EMAC_TX_CTRL_RSA 0x0040 +#define EMAC_TX_CTRL_IVT 0x0020 +#define EMAC_TX_CTRL_RVT 0x0010 +#define EMAC_TX_CTRL_TAH_CSUM 0x000e + +/* EMAC specific TX descriptor status fields (read access) */ +#define EMAC_TX_ST_BFCS 0x0200 +#define EMAC_TX_ST_LCS 0x0080 +#define EMAC_TX_ST_ED 0x0040 +#define EMAC_TX_ST_EC 0x0020 +#define EMAC_TX_ST_LC 0x0010 +#define EMAC_TX_ST_MC 0x0008 +#define EMAC_TX_ST_SC 0x0004 +#define EMAC_TX_ST_UR 0x0002 +#define EMAC_TX_ST_SQE 0x0001 +#define EMAC_IS_BAD_TX (EMAC_TX_ST_LCS | EMAC_TX_ST_ED | \ + EMAC_TX_ST_EC | EMAC_TX_ST_LC | \ + EMAC_TX_ST_MC | EMAC_TX_ST_UR) +#define EMAC_IS_BAD_TX_TAH (EMAC_TX_ST_LCS | EMAC_TX_ST_ED | \ + EMAC_TX_ST_EC | EMAC_TX_ST_LC) + +/* EMAC specific RX descriptor status fields (read access) */ +#define EMAC_RX_ST_OE 0x0200 +#define EMAC_RX_ST_PP 0x0100 +#define EMAC_RX_ST_BP 0x0080 +#define EMAC_RX_ST_RP 0x0040 +#define EMAC_RX_ST_SE 0x0020 +#define EMAC_RX_ST_AE 0x0010 +#define EMAC_RX_ST_BFCS 0x0008 +#define EMAC_RX_ST_PTL 0x0004 +#define EMAC_RX_ST_ORE 0x0002 +#define EMAC_RX_ST_IRE 0x0001 +#define EMAC_RX_TAH_BAD_CSUM 0x0003 +#define EMAC_BAD_RX_MASK (EMAC_RX_ST_OE | EMAC_RX_ST_BP | \ + EMAC_RX_ST_RP | EMAC_RX_ST_SE | \ + EMAC_RX_ST_AE | EMAC_RX_ST_BFCS | \ + EMAC_RX_ST_PTL | EMAC_RX_ST_ORE | \ + EMAC_RX_ST_IRE ) +#endif /* __IBM_NEWEMAC_H */ diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c new file mode 100644 index 000000000..ff5487bbe --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -0,0 +1,782 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * drivers/net/ethernet/ibm/emac/mal.c + * + * Memory Access Layer (MAL) support + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Benjamin Herrenschmidt <benh@kernel.crashing.org>, + * David Gibson <hermes@gibson.dropbear.id.au>, + * + * Armin Kuster <akuster@mvista.com> + * Copyright 2002 MontaVista Softare Inc. + */ + +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/of_irq.h> + +#include "core.h" +#include <asm/dcr-regs.h> + +static int mal_count; + +int mal_register_commac(struct mal_instance *mal, struct mal_commac *commac) +{ + unsigned long flags; + + spin_lock_irqsave(&mal->lock, flags); + + MAL_DBG(mal, "reg(%08x, %08x)" NL, + commac->tx_chan_mask, commac->rx_chan_mask); + + /* Don't let multiple commacs claim the same channel(s) */ + if ((mal->tx_chan_mask & commac->tx_chan_mask) || + (mal->rx_chan_mask & commac->rx_chan_mask)) { + spin_unlock_irqrestore(&mal->lock, flags); + printk(KERN_WARNING "mal%d: COMMAC channels conflict!\n", + mal->index); + return -EBUSY; + } + + if (list_empty(&mal->list)) + napi_enable(&mal->napi); + mal->tx_chan_mask |= commac->tx_chan_mask; + mal->rx_chan_mask |= commac->rx_chan_mask; + list_add(&commac->list, &mal->list); + + spin_unlock_irqrestore(&mal->lock, flags); + + return 0; +} + +void mal_unregister_commac(struct mal_instance *mal, + struct mal_commac *commac) +{ + unsigned long flags; + + spin_lock_irqsave(&mal->lock, flags); + + MAL_DBG(mal, "unreg(%08x, %08x)" NL, + commac->tx_chan_mask, commac->rx_chan_mask); + + mal->tx_chan_mask &= ~commac->tx_chan_mask; + mal->rx_chan_mask &= ~commac->rx_chan_mask; + list_del_init(&commac->list); + if (list_empty(&mal->list)) + napi_disable(&mal->napi); + + spin_unlock_irqrestore(&mal->lock, flags); +} + +int mal_set_rcbs(struct mal_instance *mal, int channel, unsigned long size) +{ + BUG_ON(channel < 0 || channel >= mal->num_rx_chans || + size > MAL_MAX_RX_SIZE); + + MAL_DBG(mal, "set_rbcs(%d, %lu)" NL, channel, size); + + if (size & 0xf) { + printk(KERN_WARNING + "mal%d: incorrect RX size %lu for the channel %d\n", + mal->index, size, channel); + return -EINVAL; + } + + set_mal_dcrn(mal, MAL_RCBS(channel), size >> 4); + return 0; +} + +int mal_tx_bd_offset(struct mal_instance *mal, int channel) +{ + BUG_ON(channel < 0 || channel >= mal->num_tx_chans); + + return channel * NUM_TX_BUFF; +} + +int mal_rx_bd_offset(struct mal_instance *mal, int channel) +{ + BUG_ON(channel < 0 || channel >= mal->num_rx_chans); + return mal->num_tx_chans * NUM_TX_BUFF + channel * NUM_RX_BUFF; +} + +void mal_enable_tx_channel(struct mal_instance *mal, int channel) +{ + unsigned long flags; + + spin_lock_irqsave(&mal->lock, flags); + + MAL_DBG(mal, "enable_tx(%d)" NL, channel); + + set_mal_dcrn(mal, MAL_TXCASR, + get_mal_dcrn(mal, MAL_TXCASR) | MAL_CHAN_MASK(channel)); + + spin_unlock_irqrestore(&mal->lock, flags); +} + +void mal_disable_tx_channel(struct mal_instance *mal, int channel) +{ + set_mal_dcrn(mal, MAL_TXCARR, MAL_CHAN_MASK(channel)); + + MAL_DBG(mal, "disable_tx(%d)" NL, channel); +} + +void mal_enable_rx_channel(struct mal_instance *mal, int channel) +{ + unsigned long flags; + + /* + * On some 4xx PPC's (e.g. 460EX/GT), the rx channel is a multiple + * of 8, but enabling in MAL_RXCASR needs the divided by 8 value + * for the bitmask + */ + if (!(channel % 8)) + channel >>= 3; + + spin_lock_irqsave(&mal->lock, flags); + + MAL_DBG(mal, "enable_rx(%d)" NL, channel); + + set_mal_dcrn(mal, MAL_RXCASR, + get_mal_dcrn(mal, MAL_RXCASR) | MAL_CHAN_MASK(channel)); + + spin_unlock_irqrestore(&mal->lock, flags); +} + +void mal_disable_rx_channel(struct mal_instance *mal, int channel) +{ + /* + * On some 4xx PPC's (e.g. 460EX/GT), the rx channel is a multiple + * of 8, but enabling in MAL_RXCASR needs the divided by 8 value + * for the bitmask + */ + if (!(channel % 8)) + channel >>= 3; + + set_mal_dcrn(mal, MAL_RXCARR, MAL_CHAN_MASK(channel)); + + MAL_DBG(mal, "disable_rx(%d)" NL, channel); +} + +void mal_poll_add(struct mal_instance *mal, struct mal_commac *commac) +{ + unsigned long flags; + + spin_lock_irqsave(&mal->lock, flags); + + MAL_DBG(mal, "poll_add(%p)" NL, commac); + + /* starts disabled */ + set_bit(MAL_COMMAC_POLL_DISABLED, &commac->flags); + + list_add_tail(&commac->poll_list, &mal->poll_list); + + spin_unlock_irqrestore(&mal->lock, flags); +} + +void mal_poll_del(struct mal_instance *mal, struct mal_commac *commac) +{ + unsigned long flags; + + spin_lock_irqsave(&mal->lock, flags); + + MAL_DBG(mal, "poll_del(%p)" NL, commac); + + list_del(&commac->poll_list); + + spin_unlock_irqrestore(&mal->lock, flags); +} + +/* synchronized by mal_poll() */ +static inline void mal_enable_eob_irq(struct mal_instance *mal) +{ + MAL_DBG2(mal, "enable_irq" NL); + + // XXX might want to cache MAL_CFG as the DCR read can be slooooow + set_mal_dcrn(mal, MAL_CFG, get_mal_dcrn(mal, MAL_CFG) | MAL_CFG_EOPIE); +} + +/* synchronized by NAPI state */ +static inline void mal_disable_eob_irq(struct mal_instance *mal) +{ + // XXX might want to cache MAL_CFG as the DCR read can be slooooow + set_mal_dcrn(mal, MAL_CFG, get_mal_dcrn(mal, MAL_CFG) & ~MAL_CFG_EOPIE); + + MAL_DBG2(mal, "disable_irq" NL); +} + +static irqreturn_t mal_serr(int irq, void *dev_instance) +{ + struct mal_instance *mal = dev_instance; + + u32 esr = get_mal_dcrn(mal, MAL_ESR); + + /* Clear the error status register */ + set_mal_dcrn(mal, MAL_ESR, esr); + + MAL_DBG(mal, "SERR %08x" NL, esr); + + if (esr & MAL_ESR_EVB) { + if (esr & MAL_ESR_DE) { + /* We ignore Descriptor error, + * TXDE or RXDE interrupt will be generated anyway. + */ + return IRQ_HANDLED; + } + + if (esr & MAL_ESR_PEIN) { + /* PLB error, it's probably buggy hardware or + * incorrect physical address in BD (i.e. bug) + */ + if (net_ratelimit()) + printk(KERN_ERR + "mal%d: system error, " + "PLB (ESR = 0x%08x)\n", + mal->index, esr); + return IRQ_HANDLED; + } + + /* OPB error, it's probably buggy hardware or incorrect + * EBC setup + */ + if (net_ratelimit()) + printk(KERN_ERR + "mal%d: system error, OPB (ESR = 0x%08x)\n", + mal->index, esr); + } + return IRQ_HANDLED; +} + +static inline void mal_schedule_poll(struct mal_instance *mal) +{ + if (likely(napi_schedule_prep(&mal->napi))) { + MAL_DBG2(mal, "schedule_poll" NL); + spin_lock(&mal->lock); + mal_disable_eob_irq(mal); + spin_unlock(&mal->lock); + __napi_schedule(&mal->napi); + } else + MAL_DBG2(mal, "already in poll" NL); +} + +static irqreturn_t mal_txeob(int irq, void *dev_instance) +{ + struct mal_instance *mal = dev_instance; + + u32 r = get_mal_dcrn(mal, MAL_TXEOBISR); + + MAL_DBG2(mal, "txeob %08x" NL, r); + + mal_schedule_poll(mal); + set_mal_dcrn(mal, MAL_TXEOBISR, r); + +#ifdef CONFIG_PPC_DCR_NATIVE + if (mal_has_feature(mal, MAL_FTR_CLEAR_ICINTSTAT)) + mtdcri(SDR0, DCRN_SDR_ICINTSTAT, + (mfdcri(SDR0, DCRN_SDR_ICINTSTAT) | ICINTSTAT_ICTX)); +#endif + + return IRQ_HANDLED; +} + +static irqreturn_t mal_rxeob(int irq, void *dev_instance) +{ + struct mal_instance *mal = dev_instance; + + u32 r = get_mal_dcrn(mal, MAL_RXEOBISR); + + MAL_DBG2(mal, "rxeob %08x" NL, r); + + mal_schedule_poll(mal); + set_mal_dcrn(mal, MAL_RXEOBISR, r); + +#ifdef CONFIG_PPC_DCR_NATIVE + if (mal_has_feature(mal, MAL_FTR_CLEAR_ICINTSTAT)) + mtdcri(SDR0, DCRN_SDR_ICINTSTAT, + (mfdcri(SDR0, DCRN_SDR_ICINTSTAT) | ICINTSTAT_ICRX)); +#endif + + return IRQ_HANDLED; +} + +static irqreturn_t mal_txde(int irq, void *dev_instance) +{ + struct mal_instance *mal = dev_instance; + + u32 deir = get_mal_dcrn(mal, MAL_TXDEIR); + set_mal_dcrn(mal, MAL_TXDEIR, deir); + + MAL_DBG(mal, "txde %08x" NL, deir); + + if (net_ratelimit()) + printk(KERN_ERR + "mal%d: TX descriptor error (TXDEIR = 0x%08x)\n", + mal->index, deir); + + return IRQ_HANDLED; +} + +static irqreturn_t mal_rxde(int irq, void *dev_instance) +{ + struct mal_instance *mal = dev_instance; + struct list_head *l; + + u32 deir = get_mal_dcrn(mal, MAL_RXDEIR); + + MAL_DBG(mal, "rxde %08x" NL, deir); + + list_for_each(l, &mal->list) { + struct mal_commac *mc = list_entry(l, struct mal_commac, list); + if (deir & mc->rx_chan_mask) { + set_bit(MAL_COMMAC_RX_STOPPED, &mc->flags); + mc->ops->rxde(mc->dev); + } + } + + mal_schedule_poll(mal); + set_mal_dcrn(mal, MAL_RXDEIR, deir); + + return IRQ_HANDLED; +} + +static irqreturn_t mal_int(int irq, void *dev_instance) +{ + struct mal_instance *mal = dev_instance; + u32 esr = get_mal_dcrn(mal, MAL_ESR); + + if (esr & MAL_ESR_EVB) { + /* descriptor error */ + if (esr & MAL_ESR_DE) { + if (esr & MAL_ESR_CIDT) + return mal_rxde(irq, dev_instance); + else + return mal_txde(irq, dev_instance); + } else { /* SERR */ + return mal_serr(irq, dev_instance); + } + } + return IRQ_HANDLED; +} + +void mal_poll_disable(struct mal_instance *mal, struct mal_commac *commac) +{ + /* Spinlock-type semantics: only one caller disable poll at a time */ + while (test_and_set_bit(MAL_COMMAC_POLL_DISABLED, &commac->flags)) + msleep(1); + + /* Synchronize with the MAL NAPI poller */ + napi_synchronize(&mal->napi); +} + +void mal_poll_enable(struct mal_instance *mal, struct mal_commac *commac) +{ + smp_wmb(); + clear_bit(MAL_COMMAC_POLL_DISABLED, &commac->flags); + + /* Feels better to trigger a poll here to catch up with events that + * may have happened on this channel while disabled. It will most + * probably be delayed until the next interrupt but that's mostly a + * non-issue in the context where this is called. + */ + napi_schedule(&mal->napi); +} + +static int mal_poll(struct napi_struct *napi, int budget) +{ + struct mal_instance *mal = container_of(napi, struct mal_instance, napi); + struct list_head *l; + int received = 0; + unsigned long flags; + + MAL_DBG2(mal, "poll(%d)" NL, budget); + + /* Process TX skbs */ + list_for_each(l, &mal->poll_list) { + struct mal_commac *mc = + list_entry(l, struct mal_commac, poll_list); + mc->ops->poll_tx(mc->dev); + } + + /* Process RX skbs. + * + * We _might_ need something more smart here to enforce polling + * fairness. + */ + list_for_each(l, &mal->poll_list) { + struct mal_commac *mc = + list_entry(l, struct mal_commac, poll_list); + int n; + if (unlikely(test_bit(MAL_COMMAC_POLL_DISABLED, &mc->flags))) + continue; + n = mc->ops->poll_rx(mc->dev, budget - received); + if (n) { + received += n; + if (received >= budget) + return budget; + } + } + + if (napi_complete_done(napi, received)) { + /* We need to disable IRQs to protect from RXDE IRQ here */ + spin_lock_irqsave(&mal->lock, flags); + mal_enable_eob_irq(mal); + spin_unlock_irqrestore(&mal->lock, flags); + } + + /* Check for "rotting" packet(s) */ + list_for_each(l, &mal->poll_list) { + struct mal_commac *mc = + list_entry(l, struct mal_commac, poll_list); + if (unlikely(test_bit(MAL_COMMAC_POLL_DISABLED, &mc->flags))) + continue; + if (unlikely(mc->ops->peek_rx(mc->dev) || + test_bit(MAL_COMMAC_RX_STOPPED, &mc->flags))) { + MAL_DBG2(mal, "rotting packet" NL); + if (!napi_reschedule(napi)) + goto more_work; + + spin_lock_irqsave(&mal->lock, flags); + mal_disable_eob_irq(mal); + spin_unlock_irqrestore(&mal->lock, flags); + } + mc->ops->poll_tx(mc->dev); + } + + more_work: + MAL_DBG2(mal, "poll() %d <- %d" NL, budget, received); + return received; +} + +static void mal_reset(struct mal_instance *mal) +{ + int n = 10; + + MAL_DBG(mal, "reset" NL); + + set_mal_dcrn(mal, MAL_CFG, MAL_CFG_SR); + + /* Wait for reset to complete (1 system clock) */ + while ((get_mal_dcrn(mal, MAL_CFG) & MAL_CFG_SR) && n) + --n; + + if (unlikely(!n)) + printk(KERN_ERR "mal%d: reset timeout\n", mal->index); +} + +int mal_get_regs_len(struct mal_instance *mal) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct mal_regs); +} + +void *mal_dump_regs(struct mal_instance *mal, void *buf) +{ + struct emac_ethtool_regs_subhdr *hdr = buf; + struct mal_regs *regs = (struct mal_regs *)(hdr + 1); + int i; + + hdr->version = mal->version; + hdr->index = mal->index; + + regs->tx_count = mal->num_tx_chans; + regs->rx_count = mal->num_rx_chans; + + regs->cfg = get_mal_dcrn(mal, MAL_CFG); + regs->esr = get_mal_dcrn(mal, MAL_ESR); + regs->ier = get_mal_dcrn(mal, MAL_IER); + regs->tx_casr = get_mal_dcrn(mal, MAL_TXCASR); + regs->tx_carr = get_mal_dcrn(mal, MAL_TXCARR); + regs->tx_eobisr = get_mal_dcrn(mal, MAL_TXEOBISR); + regs->tx_deir = get_mal_dcrn(mal, MAL_TXDEIR); + regs->rx_casr = get_mal_dcrn(mal, MAL_RXCASR); + regs->rx_carr = get_mal_dcrn(mal, MAL_RXCARR); + regs->rx_eobisr = get_mal_dcrn(mal, MAL_RXEOBISR); + regs->rx_deir = get_mal_dcrn(mal, MAL_RXDEIR); + + for (i = 0; i < regs->tx_count; ++i) + regs->tx_ctpr[i] = get_mal_dcrn(mal, MAL_TXCTPR(i)); + + for (i = 0; i < regs->rx_count; ++i) { + regs->rx_ctpr[i] = get_mal_dcrn(mal, MAL_RXCTPR(i)); + regs->rcbs[i] = get_mal_dcrn(mal, MAL_RCBS(i)); + } + return regs + 1; +} + +static int mal_probe(struct platform_device *ofdev) +{ + struct mal_instance *mal; + int err = 0, i, bd_size; + int index = mal_count++; + unsigned int dcr_base; + const u32 *prop; + u32 cfg; + unsigned long irqflags; + irq_handler_t hdlr_serr, hdlr_txde, hdlr_rxde; + + mal = kzalloc(sizeof(struct mal_instance), GFP_KERNEL); + if (!mal) + return -ENOMEM; + + mal->index = index; + mal->ofdev = ofdev; + mal->version = of_device_is_compatible(ofdev->dev.of_node, "ibm,mcmal2") ? 2 : 1; + + MAL_DBG(mal, "probe" NL); + + prop = of_get_property(ofdev->dev.of_node, "num-tx-chans", NULL); + if (prop == NULL) { + printk(KERN_ERR + "mal%d: can't find MAL num-tx-chans property!\n", + index); + err = -ENODEV; + goto fail; + } + mal->num_tx_chans = prop[0]; + + prop = of_get_property(ofdev->dev.of_node, "num-rx-chans", NULL); + if (prop == NULL) { + printk(KERN_ERR + "mal%d: can't find MAL num-rx-chans property!\n", + index); + err = -ENODEV; + goto fail; + } + mal->num_rx_chans = prop[0]; + + dcr_base = dcr_resource_start(ofdev->dev.of_node, 0); + if (dcr_base == 0) { + printk(KERN_ERR + "mal%d: can't find DCR resource!\n", index); + err = -ENODEV; + goto fail; + } + mal->dcr_host = dcr_map(ofdev->dev.of_node, dcr_base, 0x100); + if (!DCR_MAP_OK(mal->dcr_host)) { + printk(KERN_ERR + "mal%d: failed to map DCRs !\n", index); + err = -ENODEV; + goto fail; + } + + if (of_device_is_compatible(ofdev->dev.of_node, "ibm,mcmal-405ez")) { +#if defined(CONFIG_IBM_EMAC_MAL_CLR_ICINTSTAT) && \ + defined(CONFIG_IBM_EMAC_MAL_COMMON_ERR) + mal->features |= (MAL_FTR_CLEAR_ICINTSTAT | + MAL_FTR_COMMON_ERR_INT); +#else + printk(KERN_ERR "%pOF: Support for 405EZ not enabled!\n", + ofdev->dev.of_node); + err = -ENODEV; + goto fail; +#endif + } + + mal->txeob_irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); + mal->rxeob_irq = irq_of_parse_and_map(ofdev->dev.of_node, 1); + mal->serr_irq = irq_of_parse_and_map(ofdev->dev.of_node, 2); + + if (mal_has_feature(mal, MAL_FTR_COMMON_ERR_INT)) { + mal->txde_irq = mal->rxde_irq = mal->serr_irq; + } else { + mal->txde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 3); + mal->rxde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 4); + } + + if (!mal->txeob_irq || !mal->rxeob_irq || !mal->serr_irq || + !mal->txde_irq || !mal->rxde_irq) { + printk(KERN_ERR + "mal%d: failed to map interrupts !\n", index); + err = -ENODEV; + goto fail_unmap; + } + + INIT_LIST_HEAD(&mal->poll_list); + INIT_LIST_HEAD(&mal->list); + spin_lock_init(&mal->lock); + + init_dummy_netdev(&mal->dummy_dev); + + netif_napi_add_weight(&mal->dummy_dev, &mal->napi, mal_poll, + CONFIG_IBM_EMAC_POLL_WEIGHT); + + /* Load power-on reset defaults */ + mal_reset(mal); + + /* Set the MAL configuration register */ + cfg = (mal->version == 2) ? MAL2_CFG_DEFAULT : MAL1_CFG_DEFAULT; + cfg |= MAL_CFG_PLBB | MAL_CFG_OPBBL | MAL_CFG_LEA; + + /* Current Axon is not happy with priority being non-0, it can + * deadlock, fix it up here + */ + if (of_device_is_compatible(ofdev->dev.of_node, "ibm,mcmal-axon")) + cfg &= ~(MAL2_CFG_RPP_10 | MAL2_CFG_WPP_10); + + /* Apply configuration */ + set_mal_dcrn(mal, MAL_CFG, cfg); + + /* Allocate space for BD rings */ + BUG_ON(mal->num_tx_chans <= 0 || mal->num_tx_chans > 32); + BUG_ON(mal->num_rx_chans <= 0 || mal->num_rx_chans > 32); + + bd_size = sizeof(struct mal_descriptor) * + (NUM_TX_BUFF * mal->num_tx_chans + + NUM_RX_BUFF * mal->num_rx_chans); + mal->bd_virt = dma_alloc_coherent(&ofdev->dev, bd_size, &mal->bd_dma, + GFP_KERNEL); + if (mal->bd_virt == NULL) { + err = -ENOMEM; + goto fail_unmap; + } + + for (i = 0; i < mal->num_tx_chans; ++i) + set_mal_dcrn(mal, MAL_TXCTPR(i), mal->bd_dma + + sizeof(struct mal_descriptor) * + mal_tx_bd_offset(mal, i)); + + for (i = 0; i < mal->num_rx_chans; ++i) + set_mal_dcrn(mal, MAL_RXCTPR(i), mal->bd_dma + + sizeof(struct mal_descriptor) * + mal_rx_bd_offset(mal, i)); + + if (mal_has_feature(mal, MAL_FTR_COMMON_ERR_INT)) { + irqflags = IRQF_SHARED; + hdlr_serr = hdlr_txde = hdlr_rxde = mal_int; + } else { + irqflags = 0; + hdlr_serr = mal_serr; + hdlr_txde = mal_txde; + hdlr_rxde = mal_rxde; + } + + err = request_irq(mal->serr_irq, hdlr_serr, irqflags, "MAL SERR", mal); + if (err) + goto fail2; + err = request_irq(mal->txde_irq, hdlr_txde, irqflags, "MAL TX DE", mal); + if (err) + goto fail3; + err = request_irq(mal->txeob_irq, mal_txeob, 0, "MAL TX EOB", mal); + if (err) + goto fail4; + err = request_irq(mal->rxde_irq, hdlr_rxde, irqflags, "MAL RX DE", mal); + if (err) + goto fail5; + err = request_irq(mal->rxeob_irq, mal_rxeob, 0, "MAL RX EOB", mal); + if (err) + goto fail6; + + /* Enable all MAL SERR interrupt sources */ + set_mal_dcrn(mal, MAL_IER, MAL_IER_EVENTS); + + /* Enable EOB interrupt */ + mal_enable_eob_irq(mal); + + printk(KERN_INFO + "MAL v%d %pOF, %d TX channels, %d RX channels\n", + mal->version, ofdev->dev.of_node, + mal->num_tx_chans, mal->num_rx_chans); + + /* Advertise this instance to the rest of the world */ + wmb(); + platform_set_drvdata(ofdev, mal); + + return 0; + + fail6: + free_irq(mal->rxde_irq, mal); + fail5: + free_irq(mal->txeob_irq, mal); + fail4: + free_irq(mal->txde_irq, mal); + fail3: + free_irq(mal->serr_irq, mal); + fail2: + dma_free_coherent(&ofdev->dev, bd_size, mal->bd_virt, mal->bd_dma); + fail_unmap: + dcr_unmap(mal->dcr_host, 0x100); + fail: + kfree(mal); + + return err; +} + +static int mal_remove(struct platform_device *ofdev) +{ + struct mal_instance *mal = platform_get_drvdata(ofdev); + + MAL_DBG(mal, "remove" NL); + + /* Synchronize with scheduled polling */ + napi_disable(&mal->napi); + + if (!list_empty(&mal->list)) + /* This is *very* bad */ + WARN(1, KERN_EMERG + "mal%d: commac list is not empty on remove!\n", + mal->index); + + free_irq(mal->serr_irq, mal); + free_irq(mal->txde_irq, mal); + free_irq(mal->txeob_irq, mal); + free_irq(mal->rxde_irq, mal); + free_irq(mal->rxeob_irq, mal); + + mal_reset(mal); + + dma_free_coherent(&ofdev->dev, + sizeof(struct mal_descriptor) * + (NUM_TX_BUFF * mal->num_tx_chans + + NUM_RX_BUFF * mal->num_rx_chans), mal->bd_virt, + mal->bd_dma); + kfree(mal); + + return 0; +} + +static const struct of_device_id mal_platform_match[] = +{ + { + .compatible = "ibm,mcmal", + }, + { + .compatible = "ibm,mcmal2", + }, + /* Backward compat */ + { + .type = "mcmal-dma", + .compatible = "ibm,mcmal", + }, + { + .type = "mcmal-dma", + .compatible = "ibm,mcmal2", + }, + {}, +}; + +static struct platform_driver mal_of_driver = { + .driver = { + .name = "mcmal", + .of_match_table = mal_platform_match, + }, + .probe = mal_probe, + .remove = mal_remove, +}; + +int __init mal_init(void) +{ + return platform_driver_register(&mal_of_driver); +} + +void mal_exit(void) +{ + platform_driver_unregister(&mal_of_driver); +} diff --git a/drivers/net/ethernet/ibm/emac/mal.h b/drivers/net/ethernet/ibm/emac/mal.h new file mode 100644 index 000000000..d212373a7 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/mal.h @@ -0,0 +1,307 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * drivers/net/ethernet/ibm/emac/mal.h + * + * Memory Access Layer (MAL) support + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Armin Kuster <akuster@mvista.com> + * Copyright 2002 MontaVista Softare Inc. + */ +#ifndef __IBM_NEWEMAC_MAL_H +#define __IBM_NEWEMAC_MAL_H + +/* + * There are some variations on the MAL, we express them in this driver as + * MAL Version 1 and 2 though that doesn't match any IBM terminology. + * + * We call MAL 1 the version in 405GP, 405GPR, 405EP, 440EP, 440GR and + * NP405H. + * + * We call MAL 2 the version in 440GP, 440GX, 440SP, 440SPE and Axon + * + * The driver expects a "version" property in the emac node containing + * a number 1 or 2. New device-trees for EMAC capable platforms are thus + * required to include that when porting to arch/powerpc. + */ + +/* MALx DCR registers */ +#define MAL_CFG 0x00 +#define MAL_CFG_SR 0x80000000 +#define MAL_CFG_PLBB 0x00004000 +#define MAL_CFG_OPBBL 0x00000080 +#define MAL_CFG_EOPIE 0x00000004 +#define MAL_CFG_LEA 0x00000002 +#define MAL_CFG_SD 0x00000001 + +/* MAL V1 CFG bits */ +#define MAL1_CFG_PLBP_MASK 0x00c00000 +#define MAL1_CFG_PLBP_10 0x00800000 +#define MAL1_CFG_GA 0x00200000 +#define MAL1_CFG_OA 0x00100000 +#define MAL1_CFG_PLBLE 0x00080000 +#define MAL1_CFG_PLBT_MASK 0x00078000 +#define MAL1_CFG_DEFAULT (MAL1_CFG_PLBP_10 | MAL1_CFG_PLBT_MASK) + +/* MAL V2 CFG bits */ +#define MAL2_CFG_RPP_MASK 0x00c00000 +#define MAL2_CFG_RPP_10 0x00800000 +#define MAL2_CFG_RMBS_MASK 0x00300000 +#define MAL2_CFG_WPP_MASK 0x000c0000 +#define MAL2_CFG_WPP_10 0x00080000 +#define MAL2_CFG_WMBS_MASK 0x00030000 +#define MAL2_CFG_PLBLE 0x00008000 +#define MAL2_CFG_DEFAULT (MAL2_CFG_RMBS_MASK | MAL2_CFG_WMBS_MASK | \ + MAL2_CFG_RPP_10 | MAL2_CFG_WPP_10) + +#define MAL_ESR 0x01 +#define MAL_ESR_EVB 0x80000000 +#define MAL_ESR_CIDT 0x40000000 +#define MAL_ESR_CID_MASK 0x3e000000 +#define MAL_ESR_CID_SHIFT 25 +#define MAL_ESR_DE 0x00100000 +#define MAL_ESR_OTE 0x00040000 +#define MAL_ESR_OSE 0x00020000 +#define MAL_ESR_PEIN 0x00010000 +#define MAL_ESR_DEI 0x00000010 +#define MAL_ESR_OTEI 0x00000004 +#define MAL_ESR_OSEI 0x00000002 +#define MAL_ESR_PBEI 0x00000001 + +/* MAL V1 ESR bits */ +#define MAL1_ESR_ONE 0x00080000 +#define MAL1_ESR_ONEI 0x00000008 + +/* MAL V2 ESR bits */ +#define MAL2_ESR_PTE 0x00800000 +#define MAL2_ESR_PRE 0x00400000 +#define MAL2_ESR_PWE 0x00200000 +#define MAL2_ESR_PTEI 0x00000080 +#define MAL2_ESR_PREI 0x00000040 +#define MAL2_ESR_PWEI 0x00000020 + + +#define MAL_IER 0x02 +/* MAL IER bits */ +#define MAL_IER_DE 0x00000010 +#define MAL_IER_OTE 0x00000004 +#define MAL_IER_OE 0x00000002 +#define MAL_IER_PE 0x00000001 + +/* PLB read/write/timeout errors */ +#define MAL_IER_PTE 0x00000080 +#define MAL_IER_PRE 0x00000040 +#define MAL_IER_PWE 0x00000020 + +#define MAL_IER_SOC_EVENTS (MAL_IER_PTE | MAL_IER_PRE | MAL_IER_PWE) +#define MAL_IER_EVENTS (MAL_IER_SOC_EVENTS | MAL_IER_DE | \ + MAL_IER_OTE | MAL_IER_OE | MAL_IER_PE) + +#define MAL_TXCASR 0x04 +#define MAL_TXCARR 0x05 +#define MAL_TXEOBISR 0x06 +#define MAL_TXDEIR 0x07 +#define MAL_RXCASR 0x10 +#define MAL_RXCARR 0x11 +#define MAL_RXEOBISR 0x12 +#define MAL_RXDEIR 0x13 +#define MAL_TXCTPR(n) ((n) + 0x20) +#define MAL_RXCTPR(n) ((n) + 0x40) +#define MAL_RCBS(n) ((n) + 0x60) + +/* In reality MAL can handle TX buffers up to 4095 bytes long, + * but this isn't a good round number :) --ebs + */ +#define MAL_MAX_TX_SIZE 4080 +#define MAL_MAX_RX_SIZE 4080 + +static inline int mal_rx_size(int len) +{ + len = (len + 0xf) & ~0xf; + return len > MAL_MAX_RX_SIZE ? MAL_MAX_RX_SIZE : len; +} + +static inline int mal_tx_chunks(int len) +{ + return DIV_ROUND_UP(len, MAL_MAX_TX_SIZE); +} + +#define MAL_CHAN_MASK(n) (0x80000000 >> (n)) + +/* MAL Buffer Descriptor structure */ +struct mal_descriptor { + u16 ctrl; /* MAL / Commac status control bits */ + u16 data_len; /* Max length is 4K-1 (12 bits) */ + u32 data_ptr; /* pointer to actual data buffer */ +}; + +/* the following defines are for the MadMAL status and control registers. */ +/* MADMAL transmit and receive status/control bits */ +#define MAL_RX_CTRL_EMPTY 0x8000 +#define MAL_RX_CTRL_WRAP 0x4000 +#define MAL_RX_CTRL_CM 0x2000 +#define MAL_RX_CTRL_LAST 0x1000 +#define MAL_RX_CTRL_FIRST 0x0800 +#define MAL_RX_CTRL_INTR 0x0400 +#define MAL_RX_CTRL_SINGLE (MAL_RX_CTRL_LAST | MAL_RX_CTRL_FIRST) +#define MAL_IS_SINGLE_RX(ctrl) (((ctrl) & MAL_RX_CTRL_SINGLE) == MAL_RX_CTRL_SINGLE) + +#define MAL_TX_CTRL_READY 0x8000 +#define MAL_TX_CTRL_WRAP 0x4000 +#define MAL_TX_CTRL_CM 0x2000 +#define MAL_TX_CTRL_LAST 0x1000 +#define MAL_TX_CTRL_INTR 0x0400 + +struct mal_commac_ops { + void (*poll_tx) (void *dev); + int (*poll_rx) (void *dev, int budget); + int (*peek_rx) (void *dev); + void (*rxde) (void *dev); +}; + +struct mal_commac { + struct mal_commac_ops *ops; + void *dev; + struct list_head poll_list; + long flags; +#define MAL_COMMAC_RX_STOPPED 0 +#define MAL_COMMAC_POLL_DISABLED 1 + u32 tx_chan_mask; + u32 rx_chan_mask; + struct list_head list; +}; + +struct mal_instance { + int version; + dcr_host_t dcr_host; + + int num_tx_chans; /* Number of TX channels */ + int num_rx_chans; /* Number of RX channels */ + int txeob_irq; /* TX End Of Buffer IRQ */ + int rxeob_irq; /* RX End Of Buffer IRQ */ + int txde_irq; /* TX Descriptor Error IRQ */ + int rxde_irq; /* RX Descriptor Error IRQ */ + int serr_irq; /* MAL System Error IRQ */ + + struct list_head poll_list; + struct napi_struct napi; + + struct list_head list; + u32 tx_chan_mask; + u32 rx_chan_mask; + + dma_addr_t bd_dma; + struct mal_descriptor *bd_virt; + + struct platform_device *ofdev; + int index; + spinlock_t lock; + + struct net_device dummy_dev; + + unsigned int features; +}; + +static inline u32 get_mal_dcrn(struct mal_instance *mal, int reg) +{ + return dcr_read(mal->dcr_host, reg); +} + +static inline void set_mal_dcrn(struct mal_instance *mal, int reg, u32 val) +{ + dcr_write(mal->dcr_host, reg, val); +} + +/* Features of various MAL implementations */ + +/* Set if you have interrupt coalescing and you have to clear the SDR + * register for TXEOB and RXEOB interrupts to work + */ +#define MAL_FTR_CLEAR_ICINTSTAT 0x00000001 + +/* Set if your MAL has SERR, TXDE, and RXDE OR'd into a single UIC + * interrupt + */ +#define MAL_FTR_COMMON_ERR_INT 0x00000002 + +enum { + MAL_FTRS_ALWAYS = 0, + + MAL_FTRS_POSSIBLE = +#ifdef CONFIG_IBM_EMAC_MAL_CLR_ICINTSTAT + MAL_FTR_CLEAR_ICINTSTAT | +#endif +#ifdef CONFIG_IBM_EMAC_MAL_COMMON_ERR + MAL_FTR_COMMON_ERR_INT | +#endif + 0, +}; + +static inline int mal_has_feature(struct mal_instance *dev, + unsigned long feature) +{ + return (MAL_FTRS_ALWAYS & feature) || + (MAL_FTRS_POSSIBLE & dev->features & feature); +} + +/* Register MAL devices */ +int mal_init(void); +void mal_exit(void); + +int mal_register_commac(struct mal_instance *mal, + struct mal_commac *commac); +void mal_unregister_commac(struct mal_instance *mal, + struct mal_commac *commac); +int mal_set_rcbs(struct mal_instance *mal, int channel, unsigned long size); + +/* Returns BD ring offset for a particular channel + (in 'struct mal_descriptor' elements) +*/ +int mal_tx_bd_offset(struct mal_instance *mal, int channel); +int mal_rx_bd_offset(struct mal_instance *mal, int channel); + +void mal_enable_tx_channel(struct mal_instance *mal, int channel); +void mal_disable_tx_channel(struct mal_instance *mal, int channel); +void mal_enable_rx_channel(struct mal_instance *mal, int channel); +void mal_disable_rx_channel(struct mal_instance *mal, int channel); + +void mal_poll_disable(struct mal_instance *mal, struct mal_commac *commac); +void mal_poll_enable(struct mal_instance *mal, struct mal_commac *commac); + +/* Add/remove EMAC to/from MAL polling list */ +void mal_poll_add(struct mal_instance *mal, struct mal_commac *commac); +void mal_poll_del(struct mal_instance *mal, struct mal_commac *commac); + +/* Ethtool MAL registers */ +struct mal_regs { + u32 tx_count; + u32 rx_count; + + u32 cfg; + u32 esr; + u32 ier; + u32 tx_casr; + u32 tx_carr; + u32 tx_eobisr; + u32 tx_deir; + u32 rx_casr; + u32 rx_carr; + u32 rx_eobisr; + u32 rx_deir; + u32 tx_ctpr[32]; + u32 rx_ctpr[32]; + u32 rcbs[32]; +}; + +int mal_get_regs_len(struct mal_instance *mal); +void *mal_dump_regs(struct mal_instance *mal, void *buf); + +#endif /* __IBM_NEWEMAC_MAL_H */ diff --git a/drivers/net/ethernet/ibm/emac/phy.c b/drivers/net/ethernet/ibm/emac/phy.c new file mode 100644 index 000000000..1e798cc9b --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/phy.c @@ -0,0 +1,568 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * drivers/net/ethernet/ibm/emac/phy.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, PHY support. + * Borrowed from sungem_phy.c, though I only kept the generic MII + * driver for now. + * + * This file should be shared with other drivers or eventually + * merged as the "low level" part of miilib + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * (c) 2003, Benjamin Herrenscmidt (benh@kernel.crashing.org) + * (c) 2004-2005, Eugene Surovegin <ebs@ebshome.net> + * + */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/mii.h> +#include <linux/ethtool.h> +#include <linux/delay.h> + +#include "emac.h" +#include "phy.h" + +#define phy_read _phy_read +#define phy_write _phy_write + +static inline int _phy_read(struct mii_phy *phy, int reg) +{ + return phy->mdio_read(phy->dev, phy->address, reg); +} + +static inline void _phy_write(struct mii_phy *phy, int reg, int val) +{ + phy->mdio_write(phy->dev, phy->address, reg, val); +} + +static inline int gpcs_phy_read(struct mii_phy *phy, int reg) +{ + return phy->mdio_read(phy->dev, phy->gpcs_address, reg); +} + +static inline void gpcs_phy_write(struct mii_phy *phy, int reg, int val) +{ + phy->mdio_write(phy->dev, phy->gpcs_address, reg, val); +} + +int emac_mii_reset_phy(struct mii_phy *phy) +{ + int val; + int limit = 10000; + + val = phy_read(phy, MII_BMCR); + val &= ~(BMCR_ISOLATE | BMCR_ANENABLE); + val |= BMCR_RESET; + phy_write(phy, MII_BMCR, val); + + udelay(300); + + while (--limit) { + val = phy_read(phy, MII_BMCR); + if (val >= 0 && (val & BMCR_RESET) == 0) + break; + udelay(10); + } + if ((val & BMCR_ISOLATE) && limit > 0) + phy_write(phy, MII_BMCR, val & ~BMCR_ISOLATE); + + return limit <= 0; +} + +int emac_mii_reset_gpcs(struct mii_phy *phy) +{ + int val; + int limit = 10000; + + val = gpcs_phy_read(phy, MII_BMCR); + val &= ~(BMCR_ISOLATE | BMCR_ANENABLE); + val |= BMCR_RESET; + gpcs_phy_write(phy, MII_BMCR, val); + + udelay(300); + + while (--limit) { + val = gpcs_phy_read(phy, MII_BMCR); + if (val >= 0 && (val & BMCR_RESET) == 0) + break; + udelay(10); + } + if ((val & BMCR_ISOLATE) && limit > 0) + gpcs_phy_write(phy, MII_BMCR, val & ~BMCR_ISOLATE); + + if (limit > 0 && phy->mode == PHY_INTERFACE_MODE_SGMII) { + /* Configure GPCS interface to recommended setting for SGMII */ + gpcs_phy_write(phy, 0x04, 0x8120); /* AsymPause, FDX */ + gpcs_phy_write(phy, 0x07, 0x2801); /* msg_pg, toggle */ + gpcs_phy_write(phy, 0x00, 0x0140); /* 1Gbps, FDX */ + } + + return limit <= 0; +} + +static int genmii_setup_aneg(struct mii_phy *phy, u32 advertise) +{ + int ctl, adv; + + phy->autoneg = AUTONEG_ENABLE; + phy->speed = SPEED_10; + phy->duplex = DUPLEX_HALF; + phy->pause = phy->asym_pause = 0; + phy->advertising = advertise; + + ctl = phy_read(phy, MII_BMCR); + if (ctl < 0) + return ctl; + ctl &= ~(BMCR_FULLDPLX | BMCR_SPEED100 | BMCR_SPEED1000 | BMCR_ANENABLE); + + /* First clear the PHY */ + phy_write(phy, MII_BMCR, ctl); + + /* Setup standard advertise */ + adv = phy_read(phy, MII_ADVERTISE); + if (adv < 0) + return adv; + adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | + ADVERTISE_PAUSE_ASYM); + if (advertise & ADVERTISED_10baseT_Half) + adv |= ADVERTISE_10HALF; + if (advertise & ADVERTISED_10baseT_Full) + adv |= ADVERTISE_10FULL; + if (advertise & ADVERTISED_100baseT_Half) + adv |= ADVERTISE_100HALF; + if (advertise & ADVERTISED_100baseT_Full) + adv |= ADVERTISE_100FULL; + if (advertise & ADVERTISED_Pause) + adv |= ADVERTISE_PAUSE_CAP; + if (advertise & ADVERTISED_Asym_Pause) + adv |= ADVERTISE_PAUSE_ASYM; + phy_write(phy, MII_ADVERTISE, adv); + + if (phy->features & + (SUPPORTED_1000baseT_Full | SUPPORTED_1000baseT_Half)) { + adv = phy_read(phy, MII_CTRL1000); + if (adv < 0) + return adv; + adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF); + if (advertise & ADVERTISED_1000baseT_Full) + adv |= ADVERTISE_1000FULL; + if (advertise & ADVERTISED_1000baseT_Half) + adv |= ADVERTISE_1000HALF; + phy_write(phy, MII_CTRL1000, adv); + } + + /* Start/Restart aneg */ + ctl = phy_read(phy, MII_BMCR); + ctl |= (BMCR_ANENABLE | BMCR_ANRESTART); + phy_write(phy, MII_BMCR, ctl); + + return 0; +} + +static int genmii_setup_forced(struct mii_phy *phy, int speed, int fd) +{ + int ctl; + + phy->autoneg = AUTONEG_DISABLE; + phy->speed = speed; + phy->duplex = fd; + phy->pause = phy->asym_pause = 0; + + ctl = phy_read(phy, MII_BMCR); + if (ctl < 0) + return ctl; + ctl &= ~(BMCR_FULLDPLX | BMCR_SPEED100 | BMCR_SPEED1000 | BMCR_ANENABLE); + + /* First clear the PHY */ + phy_write(phy, MII_BMCR, ctl | BMCR_RESET); + + /* Select speed & duplex */ + switch (speed) { + case SPEED_10: + break; + case SPEED_100: + ctl |= BMCR_SPEED100; + break; + case SPEED_1000: + ctl |= BMCR_SPEED1000; + break; + default: + return -EINVAL; + } + if (fd == DUPLEX_FULL) + ctl |= BMCR_FULLDPLX; + phy_write(phy, MII_BMCR, ctl); + + return 0; +} + +static int genmii_poll_link(struct mii_phy *phy) +{ + int status; + + /* Clear latched value with dummy read */ + phy_read(phy, MII_BMSR); + status = phy_read(phy, MII_BMSR); + if (status < 0 || (status & BMSR_LSTATUS) == 0) + return 0; + if (phy->autoneg == AUTONEG_ENABLE && !(status & BMSR_ANEGCOMPLETE)) + return 0; + return 1; +} + +static int genmii_read_link(struct mii_phy *phy) +{ + if (phy->autoneg == AUTONEG_ENABLE) { + int glpa = 0; + int lpa = phy_read(phy, MII_LPA) & phy_read(phy, MII_ADVERTISE); + if (lpa < 0) + return lpa; + + if (phy->features & + (SUPPORTED_1000baseT_Full | SUPPORTED_1000baseT_Half)) { + int adv = phy_read(phy, MII_CTRL1000); + glpa = phy_read(phy, MII_STAT1000); + + if (glpa < 0 || adv < 0) + return adv; + + glpa &= adv << 2; + } + + phy->speed = SPEED_10; + phy->duplex = DUPLEX_HALF; + phy->pause = phy->asym_pause = 0; + + if (glpa & (LPA_1000FULL | LPA_1000HALF)) { + phy->speed = SPEED_1000; + if (glpa & LPA_1000FULL) + phy->duplex = DUPLEX_FULL; + } else if (lpa & (LPA_100FULL | LPA_100HALF)) { + phy->speed = SPEED_100; + if (lpa & LPA_100FULL) + phy->duplex = DUPLEX_FULL; + } else if (lpa & LPA_10FULL) + phy->duplex = DUPLEX_FULL; + + if (phy->duplex == DUPLEX_FULL) { + phy->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; + phy->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; + } + } else { + int bmcr = phy_read(phy, MII_BMCR); + if (bmcr < 0) + return bmcr; + + if (bmcr & BMCR_FULLDPLX) + phy->duplex = DUPLEX_FULL; + else + phy->duplex = DUPLEX_HALF; + if (bmcr & BMCR_SPEED1000) + phy->speed = SPEED_1000; + else if (bmcr & BMCR_SPEED100) + phy->speed = SPEED_100; + else + phy->speed = SPEED_10; + + phy->pause = phy->asym_pause = 0; + } + return 0; +} + +/* Generic implementation for most 10/100/1000 PHYs */ +static const struct mii_phy_ops generic_phy_ops = { + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link +}; + +static struct mii_phy_def genmii_phy_def = { + .phy_id = 0x00000000, + .phy_id_mask = 0x00000000, + .name = "Generic MII", + .ops = &generic_phy_ops +}; + +/* CIS8201 */ +#define MII_CIS8201_10BTCSR 0x16 +#define TENBTCSR_ECHO_DISABLE 0x2000 +#define MII_CIS8201_EPCR 0x17 +#define EPCR_MODE_MASK 0x3000 +#define EPCR_GMII_MODE 0x0000 +#define EPCR_RGMII_MODE 0x1000 +#define EPCR_TBI_MODE 0x2000 +#define EPCR_RTBI_MODE 0x3000 +#define MII_CIS8201_ACSR 0x1c +#define ACSR_PIN_PRIO_SELECT 0x0004 + +static int cis8201_init(struct mii_phy *phy) +{ + int epcr; + + epcr = phy_read(phy, MII_CIS8201_EPCR); + if (epcr < 0) + return epcr; + + epcr &= ~EPCR_MODE_MASK; + + switch (phy->mode) { + case PHY_INTERFACE_MODE_TBI: + epcr |= EPCR_TBI_MODE; + break; + case PHY_INTERFACE_MODE_RTBI: + epcr |= EPCR_RTBI_MODE; + break; + case PHY_INTERFACE_MODE_GMII: + epcr |= EPCR_GMII_MODE; + break; + case PHY_INTERFACE_MODE_RGMII: + default: + epcr |= EPCR_RGMII_MODE; + } + + phy_write(phy, MII_CIS8201_EPCR, epcr); + + /* MII regs override strap pins */ + phy_write(phy, MII_CIS8201_ACSR, + phy_read(phy, MII_CIS8201_ACSR) | ACSR_PIN_PRIO_SELECT); + + /* Disable TX_EN -> CRS echo mode, otherwise 10/HDX doesn't work */ + phy_write(phy, MII_CIS8201_10BTCSR, + phy_read(phy, MII_CIS8201_10BTCSR) | TENBTCSR_ECHO_DISABLE); + + return 0; +} + +static const struct mii_phy_ops cis8201_phy_ops = { + .init = cis8201_init, + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link +}; + +static struct mii_phy_def cis8201_phy_def = { + .phy_id = 0x000fc410, + .phy_id_mask = 0x000ffff0, + .name = "CIS8201 Gigabit Ethernet", + .ops = &cis8201_phy_ops +}; + +static struct mii_phy_def bcm5248_phy_def = { + + .phy_id = 0x0143bc00, + .phy_id_mask = 0x0ffffff0, + .name = "BCM5248 10/100 SMII Ethernet", + .ops = &generic_phy_ops +}; + +static int m88e1111_init(struct mii_phy *phy) +{ + pr_debug("%s: Marvell 88E1111 Ethernet\n", __func__); + phy_write(phy, 0x14, 0x0ce3); + phy_write(phy, 0x18, 0x4101); + phy_write(phy, 0x09, 0x0e00); + phy_write(phy, 0x04, 0x01e1); + phy_write(phy, 0x00, 0x9140); + phy_write(phy, 0x00, 0x1140); + + return 0; +} + +static int m88e1112_init(struct mii_phy *phy) +{ + /* + * Marvell 88E1112 PHY needs to have the SGMII MAC + * interace (page 2) properly configured to + * communicate with the 460EX/GT GPCS interface. + */ + + u16 reg_short; + + pr_debug("%s: Marvell 88E1112 Ethernet\n", __func__); + + /* Set access to Page 2 */ + phy_write(phy, 0x16, 0x0002); + + phy_write(phy, 0x00, 0x0040); /* 1Gbps */ + reg_short = (u16)(phy_read(phy, 0x1a)); + reg_short |= 0x8000; /* bypass Auto-Negotiation */ + phy_write(phy, 0x1a, reg_short); + emac_mii_reset_phy(phy); /* reset MAC interface */ + + /* Reset access to Page 0 */ + phy_write(phy, 0x16, 0x0000); + + return 0; +} + +static int et1011c_init(struct mii_phy *phy) +{ + u16 reg_short; + + reg_short = (u16)(phy_read(phy, 0x16)); + reg_short &= ~(0x7); + reg_short |= 0x6; /* RGMII Trace Delay*/ + phy_write(phy, 0x16, reg_short); + + reg_short = (u16)(phy_read(phy, 0x17)); + reg_short &= ~(0x40); + phy_write(phy, 0x17, reg_short); + + phy_write(phy, 0x1c, 0x74f0); + return 0; +} + +static const struct mii_phy_ops et1011c_phy_ops = { + .init = et1011c_init, + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link +}; + +static struct mii_phy_def et1011c_phy_def = { + .phy_id = 0x0282f000, + .phy_id_mask = 0x0fffff00, + .name = "ET1011C Gigabit Ethernet", + .ops = &et1011c_phy_ops +}; + + + + + +static const struct mii_phy_ops m88e1111_phy_ops = { + .init = m88e1111_init, + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link +}; + +static struct mii_phy_def m88e1111_phy_def = { + + .phy_id = 0x01410CC0, + .phy_id_mask = 0x0ffffff0, + .name = "Marvell 88E1111 Ethernet", + .ops = &m88e1111_phy_ops, +}; + +static const struct mii_phy_ops m88e1112_phy_ops = { + .init = m88e1112_init, + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link +}; + +static struct mii_phy_def m88e1112_phy_def = { + .phy_id = 0x01410C90, + .phy_id_mask = 0x0ffffff0, + .name = "Marvell 88E1112 Ethernet", + .ops = &m88e1112_phy_ops, +}; + +static int ar8035_init(struct mii_phy *phy) +{ + phy_write(phy, 0x1d, 0x5); /* Address debug register 5 */ + phy_write(phy, 0x1e, 0x2d47); /* Value copied from u-boot */ + phy_write(phy, 0x1d, 0xb); /* Address hib ctrl */ + phy_write(phy, 0x1e, 0xbc20); /* Value copied from u-boot */ + + return 0; +} + +static const struct mii_phy_ops ar8035_phy_ops = { + .init = ar8035_init, + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link, +}; + +static struct mii_phy_def ar8035_phy_def = { + .phy_id = 0x004dd070, + .phy_id_mask = 0xfffffff0, + .name = "Atheros 8035 Gigabit Ethernet", + .ops = &ar8035_phy_ops, +}; + +static struct mii_phy_def *mii_phy_table[] = { + &et1011c_phy_def, + &cis8201_phy_def, + &bcm5248_phy_def, + &m88e1111_phy_def, + &m88e1112_phy_def, + &ar8035_phy_def, + &genmii_phy_def, + NULL +}; + +int emac_mii_phy_probe(struct mii_phy *phy, int address) +{ + struct mii_phy_def *def; + int i; + u32 id; + + phy->autoneg = AUTONEG_DISABLE; + phy->advertising = 0; + phy->address = address; + phy->speed = SPEED_10; + phy->duplex = DUPLEX_HALF; + phy->pause = phy->asym_pause = 0; + + /* Take PHY out of isolate mode and reset it. */ + if (emac_mii_reset_phy(phy)) + return -ENODEV; + + /* Read ID and find matching entry */ + id = (phy_read(phy, MII_PHYSID1) << 16) | phy_read(phy, MII_PHYSID2); + for (i = 0; (def = mii_phy_table[i]) != NULL; i++) + if ((id & def->phy_id_mask) == def->phy_id) + break; + /* Should never be NULL (we have a generic entry), but... */ + if (!def) + return -ENODEV; + + phy->def = def; + + /* Determine PHY features if needed */ + phy->features = def->features; + if (!phy->features) { + u16 bmsr = phy_read(phy, MII_BMSR); + if (bmsr & BMSR_ANEGCAPABLE) + phy->features |= SUPPORTED_Autoneg; + if (bmsr & BMSR_10HALF) + phy->features |= SUPPORTED_10baseT_Half; + if (bmsr & BMSR_10FULL) + phy->features |= SUPPORTED_10baseT_Full; + if (bmsr & BMSR_100HALF) + phy->features |= SUPPORTED_100baseT_Half; + if (bmsr & BMSR_100FULL) + phy->features |= SUPPORTED_100baseT_Full; + if (bmsr & BMSR_ESTATEN) { + u16 esr = phy_read(phy, MII_ESTATUS); + if (esr & ESTATUS_1000_TFULL) + phy->features |= SUPPORTED_1000baseT_Full; + if (esr & ESTATUS_1000_THALF) + phy->features |= SUPPORTED_1000baseT_Half; + } + phy->features |= SUPPORTED_MII; + } + + /* Setup default advertising */ + phy->advertising = phy->features; + + return 0; +} + +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/ibm/emac/phy.h b/drivers/net/ethernet/ibm/emac/phy.h new file mode 100644 index 000000000..2184e8373 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/phy.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * drivers/net/ethernet/ibm/emac/phy.h + * + * Driver for PowerPC 4xx on-chip ethernet controller, PHY support + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Benjamin Herrenschmidt <benh@kernel.crashing.org> + * February 2003 + * + * Minor additions by Eugene Surovegin <ebs@ebshome.net>, 2004 + * + * This file basically duplicates sungem_phy.{c,h} with different PHYs + * supported. I'm looking into merging that in a single mii layer more + * flexible than mii.c + */ + +#ifndef __IBM_NEWEMAC_PHY_H +#define __IBM_NEWEMAC_PHY_H + +struct mii_phy; + +/* Operations supported by any kind of PHY */ +struct mii_phy_ops { + int (*init) (struct mii_phy * phy); + int (*suspend) (struct mii_phy * phy, int wol_options); + int (*setup_aneg) (struct mii_phy * phy, u32 advertise); + int (*setup_forced) (struct mii_phy * phy, int speed, int fd); + int (*poll_link) (struct mii_phy * phy); + int (*read_link) (struct mii_phy * phy); +}; + +/* Structure used to statically define an mii/gii based PHY */ +struct mii_phy_def { + u32 phy_id; /* Concatenated ID1 << 16 | ID2 */ + u32 phy_id_mask; /* Significant bits */ + u32 features; /* Ethtool SUPPORTED_* defines or + 0 for autodetect */ + int magic_aneg; /* Autoneg does all speed test for us */ + const char *name; + const struct mii_phy_ops *ops; +}; + +/* An instance of a PHY, partially borrowed from mii_if_info */ +struct mii_phy { + struct mii_phy_def *def; + u32 advertising; /* Ethtool ADVERTISED_* defines */ + u32 features; /* Copied from mii_phy_def.features + or determined automaticaly */ + int address; /* PHY address */ + int mode; /* PHY mode */ + int gpcs_address; /* GPCS PHY address */ + + /* 1: autoneg enabled, 0: disabled */ + int autoneg; + + /* forced speed & duplex (no autoneg) + * partner speed & duplex & pause (autoneg) + */ + int speed; + int duplex; + int pause; + int asym_pause; + + /* Provided by host chip */ + struct net_device *dev; + int (*mdio_read) (struct net_device * dev, int addr, int reg); + void (*mdio_write) (struct net_device * dev, int addr, int reg, + int val); +}; + +/* Pass in a struct mii_phy with dev, mdio_read and mdio_write + * filled, the remaining fields will be filled on return + */ +int emac_mii_phy_probe(struct mii_phy *phy, int address); +int emac_mii_reset_phy(struct mii_phy *phy); +int emac_mii_reset_gpcs(struct mii_phy *phy); + +#endif /* __IBM_NEWEMAC_PHY_H */ diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c new file mode 100644 index 000000000..242ef976f --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/rgmii.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * drivers/net/ethernet/ibm/emac/rgmii.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, RGMII bridge support. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Matt Porter <mporter@kernel.crashing.org> + * Copyright 2004 MontaVista Software, Inc. + */ +#include <linux/slab.h> +#include <linux/kernel.h> +#include <linux/ethtool.h> +#include <linux/of_address.h> +#include <asm/io.h> + +#include "emac.h" +#include "debug.h" + +// XXX FIXME: Axon seems to support a subset of the RGMII, we +// thus need to take that into account and possibly change some +// of the bit settings below that don't seem to quite match the +// AXON spec + +/* RGMIIx_FER */ +#define RGMII_FER_MASK(idx) (0x7 << ((idx) * 4)) +#define RGMII_FER_RTBI(idx) (0x4 << ((idx) * 4)) +#define RGMII_FER_RGMII(idx) (0x5 << ((idx) * 4)) +#define RGMII_FER_TBI(idx) (0x6 << ((idx) * 4)) +#define RGMII_FER_GMII(idx) (0x7 << ((idx) * 4)) +#define RGMII_FER_MII(idx) RGMII_FER_GMII(idx) + +/* RGMIIx_SSR */ +#define RGMII_SSR_MASK(idx) (0x7 << ((idx) * 8)) +#define RGMII_SSR_10(idx) (0x1 << ((idx) * 8)) +#define RGMII_SSR_100(idx) (0x2 << ((idx) * 8)) +#define RGMII_SSR_1000(idx) (0x4 << ((idx) * 8)) + +/* RGMII bridge supports only GMII/TBI and RGMII/RTBI PHYs */ +static inline int rgmii_valid_mode(int phy_mode) +{ + return phy_interface_mode_is_rgmii(phy_mode) || + phy_mode == PHY_INTERFACE_MODE_GMII || + phy_mode == PHY_INTERFACE_MODE_MII || + phy_mode == PHY_INTERFACE_MODE_TBI || + phy_mode == PHY_INTERFACE_MODE_RTBI; +} + +static inline u32 rgmii_mode_mask(int mode, int input) +{ + switch (mode) { + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + return RGMII_FER_RGMII(input); + case PHY_INTERFACE_MODE_TBI: + return RGMII_FER_TBI(input); + case PHY_INTERFACE_MODE_GMII: + return RGMII_FER_GMII(input); + case PHY_INTERFACE_MODE_MII: + return RGMII_FER_MII(input); + case PHY_INTERFACE_MODE_RTBI: + return RGMII_FER_RTBI(input); + default: + BUG(); + } +} + +int rgmii_attach(struct platform_device *ofdev, int input, int mode) +{ + struct rgmii_instance *dev = platform_get_drvdata(ofdev); + struct rgmii_regs __iomem *p = dev->base; + + RGMII_DBG(dev, "attach(%d)" NL, input); + + /* Check if we need to attach to a RGMII */ + if (input < 0 || !rgmii_valid_mode(mode)) { + printk(KERN_ERR "%pOF: unsupported settings !\n", + ofdev->dev.of_node); + return -ENODEV; + } + + mutex_lock(&dev->lock); + + /* Enable this input */ + out_be32(&p->fer, in_be32(&p->fer) | rgmii_mode_mask(mode, input)); + + printk(KERN_NOTICE "%pOF: input %d in %s mode\n", + ofdev->dev.of_node, input, phy_modes(mode)); + + ++dev->users; + + mutex_unlock(&dev->lock); + + return 0; +} + +void rgmii_set_speed(struct platform_device *ofdev, int input, int speed) +{ + struct rgmii_instance *dev = platform_get_drvdata(ofdev); + struct rgmii_regs __iomem *p = dev->base; + u32 ssr; + + mutex_lock(&dev->lock); + + ssr = in_be32(&p->ssr) & ~RGMII_SSR_MASK(input); + + RGMII_DBG(dev, "speed(%d, %d)" NL, input, speed); + + if (speed == SPEED_1000) + ssr |= RGMII_SSR_1000(input); + else if (speed == SPEED_100) + ssr |= RGMII_SSR_100(input); + else if (speed == SPEED_10) + ssr |= RGMII_SSR_10(input); + + out_be32(&p->ssr, ssr); + + mutex_unlock(&dev->lock); +} + +void rgmii_get_mdio(struct platform_device *ofdev, int input) +{ + struct rgmii_instance *dev = platform_get_drvdata(ofdev); + struct rgmii_regs __iomem *p = dev->base; + u32 fer; + + RGMII_DBG2(dev, "get_mdio(%d)" NL, input); + + if (!(dev->flags & EMAC_RGMII_FLAG_HAS_MDIO)) + return; + + mutex_lock(&dev->lock); + + fer = in_be32(&p->fer); + fer |= 0x00080000u >> input; + out_be32(&p->fer, fer); + (void)in_be32(&p->fer); + + DBG2(dev, " fer = 0x%08x\n", fer); +} + +void rgmii_put_mdio(struct platform_device *ofdev, int input) +{ + struct rgmii_instance *dev = platform_get_drvdata(ofdev); + struct rgmii_regs __iomem *p = dev->base; + u32 fer; + + RGMII_DBG2(dev, "put_mdio(%d)" NL, input); + + if (!(dev->flags & EMAC_RGMII_FLAG_HAS_MDIO)) + return; + + fer = in_be32(&p->fer); + fer &= ~(0x00080000u >> input); + out_be32(&p->fer, fer); + (void)in_be32(&p->fer); + + DBG2(dev, " fer = 0x%08x\n", fer); + + mutex_unlock(&dev->lock); +} + +void rgmii_detach(struct platform_device *ofdev, int input) +{ + struct rgmii_instance *dev = platform_get_drvdata(ofdev); + struct rgmii_regs __iomem *p; + + BUG_ON(!dev || dev->users == 0); + p = dev->base; + + mutex_lock(&dev->lock); + + RGMII_DBG(dev, "detach(%d)" NL, input); + + /* Disable this input */ + out_be32(&p->fer, in_be32(&p->fer) & ~RGMII_FER_MASK(input)); + + --dev->users; + + mutex_unlock(&dev->lock); +} + +int rgmii_get_regs_len(struct platform_device *ofdev) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct rgmii_regs); +} + +void *rgmii_dump_regs(struct platform_device *ofdev, void *buf) +{ + struct rgmii_instance *dev = platform_get_drvdata(ofdev); + struct emac_ethtool_regs_subhdr *hdr = buf; + struct rgmii_regs *regs = (struct rgmii_regs *)(hdr + 1); + + hdr->version = 0; + hdr->index = 0; /* for now, are there chips with more than one + * rgmii ? if yes, then we'll add a cell_index + * like we do for emac + */ + memcpy_fromio(regs, dev->base, sizeof(struct rgmii_regs)); + return regs + 1; +} + + +static int rgmii_probe(struct platform_device *ofdev) +{ + struct device_node *np = ofdev->dev.of_node; + struct rgmii_instance *dev; + struct resource regs; + int rc; + + rc = -ENOMEM; + dev = kzalloc(sizeof(struct rgmii_instance), GFP_KERNEL); + if (dev == NULL) + goto err_gone; + + mutex_init(&dev->lock); + dev->ofdev = ofdev; + + rc = -ENXIO; + if (of_address_to_resource(np, 0, ®s)) { + printk(KERN_ERR "%pOF: Can't get registers address\n", np); + goto err_free; + } + + rc = -ENOMEM; + dev->base = (struct rgmii_regs __iomem *)ioremap(regs.start, + sizeof(struct rgmii_regs)); + if (dev->base == NULL) { + printk(KERN_ERR "%pOF: Can't map device registers!\n", np); + goto err_free; + } + + /* Check for RGMII flags */ + if (of_get_property(ofdev->dev.of_node, "has-mdio", NULL)) + dev->flags |= EMAC_RGMII_FLAG_HAS_MDIO; + + /* CAB lacks the right properties, fix this up */ + if (of_device_is_compatible(ofdev->dev.of_node, "ibm,rgmii-axon")) + dev->flags |= EMAC_RGMII_FLAG_HAS_MDIO; + + DBG2(dev, " Boot FER = 0x%08x, SSR = 0x%08x\n", + in_be32(&dev->base->fer), in_be32(&dev->base->ssr)); + + /* Disable all inputs by default */ + out_be32(&dev->base->fer, 0); + + printk(KERN_INFO + "RGMII %pOF initialized with%s MDIO support\n", + ofdev->dev.of_node, + (dev->flags & EMAC_RGMII_FLAG_HAS_MDIO) ? "" : "out"); + + wmb(); + platform_set_drvdata(ofdev, dev); + + return 0; + + err_free: + kfree(dev); + err_gone: + return rc; +} + +static int rgmii_remove(struct platform_device *ofdev) +{ + struct rgmii_instance *dev = platform_get_drvdata(ofdev); + + WARN_ON(dev->users != 0); + + iounmap(dev->base); + kfree(dev); + + return 0; +} + +static const struct of_device_id rgmii_match[] = +{ + { + .compatible = "ibm,rgmii", + }, + { + .type = "emac-rgmii", + }, + {}, +}; + +static struct platform_driver rgmii_driver = { + .driver = { + .name = "emac-rgmii", + .of_match_table = rgmii_match, + }, + .probe = rgmii_probe, + .remove = rgmii_remove, +}; + +int __init rgmii_init(void) +{ + return platform_driver_register(&rgmii_driver); +} + +void rgmii_exit(void) +{ + platform_driver_unregister(&rgmii_driver); +} diff --git a/drivers/net/ethernet/ibm/emac/rgmii.h b/drivers/net/ethernet/ibm/emac/rgmii.h new file mode 100644 index 000000000..8e4e36eed --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/rgmii.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * drivers/net/ethernet/ibm/emac/rgmii.h + * + * Driver for PowerPC 4xx on-chip ethernet controller, RGMII bridge support. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Based on ocp_zmii.h/ibm_emac_zmii.h + * Armin Kuster akuster@mvista.com + * + * Copyright 2004 MontaVista Software, Inc. + * Matt Porter <mporter@kernel.crashing.org> + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + */ + +#ifndef __IBM_NEWEMAC_RGMII_H +#define __IBM_NEWEMAC_RGMII_H + +/* RGMII bridge type */ +#define RGMII_STANDARD 0 +#define RGMII_AXON 1 + +/* RGMII bridge */ +struct rgmii_regs { + u32 fer; /* Function enable register */ + u32 ssr; /* Speed select register */ +}; + +/* RGMII device */ +struct rgmii_instance { + struct rgmii_regs __iomem *base; + + /* RGMII bridge flags */ + int flags; +#define EMAC_RGMII_FLAG_HAS_MDIO 0x00000001 + + /* Only one EMAC whacks us at a time */ + struct mutex lock; + + /* number of EMACs using this RGMII bridge */ + int users; + + /* OF device instance */ + struct platform_device *ofdev; +}; + +#ifdef CONFIG_IBM_EMAC_RGMII + +int rgmii_init(void); +void rgmii_exit(void); +int rgmii_attach(struct platform_device *ofdev, int input, int mode); +void rgmii_detach(struct platform_device *ofdev, int input); +void rgmii_get_mdio(struct platform_device *ofdev, int input); +void rgmii_put_mdio(struct platform_device *ofdev, int input); +void rgmii_set_speed(struct platform_device *ofdev, int input, int speed); +int rgmii_get_regs_len(struct platform_device *ofdev); +void *rgmii_dump_regs(struct platform_device *ofdev, void *buf); + +#else + +# define rgmii_init() 0 +# define rgmii_exit() do { } while(0) +# define rgmii_attach(x,y,z) (-ENXIO) +# define rgmii_detach(x,y) do { } while(0) +# define rgmii_get_mdio(o,i) do { } while (0) +# define rgmii_put_mdio(o,i) do { } while (0) +# define rgmii_set_speed(x,y,z) do { } while(0) +# define rgmii_get_regs_len(x) 0 +# define rgmii_dump_regs(x,buf) (buf) +#endif /* !CONFIG_IBM_EMAC_RGMII */ + +#endif /* __IBM_NEWEMAC_RGMII_H */ diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c new file mode 100644 index 000000000..008bbdaf1 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/tah.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * drivers/net/ethernet/ibm/emac/tah.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, TAH support. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright 2004 MontaVista Software, Inc. + * Matt Porter <mporter@kernel.crashing.org> + * + * Copyright (c) 2005 Eugene Surovegin <ebs@ebshome.net> + */ +#include <linux/of_address.h> +#include <asm/io.h> + +#include "emac.h" +#include "core.h" + +int tah_attach(struct platform_device *ofdev, int channel) +{ + struct tah_instance *dev = platform_get_drvdata(ofdev); + + mutex_lock(&dev->lock); + /* Reset has been done at probe() time... nothing else to do for now */ + ++dev->users; + mutex_unlock(&dev->lock); + + return 0; +} + +void tah_detach(struct platform_device *ofdev, int channel) +{ + struct tah_instance *dev = platform_get_drvdata(ofdev); + + mutex_lock(&dev->lock); + --dev->users; + mutex_unlock(&dev->lock); +} + +void tah_reset(struct platform_device *ofdev) +{ + struct tah_instance *dev = platform_get_drvdata(ofdev); + struct tah_regs __iomem *p = dev->base; + int n; + + /* Reset TAH */ + out_be32(&p->mr, TAH_MR_SR); + n = 100; + while ((in_be32(&p->mr) & TAH_MR_SR) && n) + --n; + + if (unlikely(!n)) + printk(KERN_ERR "%pOF: reset timeout\n", ofdev->dev.of_node); + + /* 10KB TAH TX FIFO accommodates the max MTU of 9000 */ + out_be32(&p->mr, + TAH_MR_CVR | TAH_MR_ST_768 | TAH_MR_TFS_10KB | TAH_MR_DTFP | + TAH_MR_DIG); +} + +int tah_get_regs_len(struct platform_device *ofdev) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct tah_regs); +} + +void *tah_dump_regs(struct platform_device *ofdev, void *buf) +{ + struct tah_instance *dev = platform_get_drvdata(ofdev); + struct emac_ethtool_regs_subhdr *hdr = buf; + struct tah_regs *regs = (struct tah_regs *)(hdr + 1); + + hdr->version = 0; + hdr->index = 0; /* for now, are there chips with more than one + * zmii ? if yes, then we'll add a cell_index + * like we do for emac + */ + memcpy_fromio(regs, dev->base, sizeof(struct tah_regs)); + return regs + 1; +} + +static int tah_probe(struct platform_device *ofdev) +{ + struct device_node *np = ofdev->dev.of_node; + struct tah_instance *dev; + struct resource regs; + int rc; + + rc = -ENOMEM; + dev = kzalloc(sizeof(struct tah_instance), GFP_KERNEL); + if (dev == NULL) + goto err_gone; + + mutex_init(&dev->lock); + dev->ofdev = ofdev; + + rc = -ENXIO; + if (of_address_to_resource(np, 0, ®s)) { + printk(KERN_ERR "%pOF: Can't get registers address\n", np); + goto err_free; + } + + rc = -ENOMEM; + dev->base = (struct tah_regs __iomem *)ioremap(regs.start, + sizeof(struct tah_regs)); + if (dev->base == NULL) { + printk(KERN_ERR "%pOF: Can't map device registers!\n", np); + goto err_free; + } + + platform_set_drvdata(ofdev, dev); + + /* Initialize TAH and enable IPv4 checksum verification, no TSO yet */ + tah_reset(ofdev); + + printk(KERN_INFO "TAH %pOF initialized\n", ofdev->dev.of_node); + wmb(); + + return 0; + + err_free: + kfree(dev); + err_gone: + return rc; +} + +static int tah_remove(struct platform_device *ofdev) +{ + struct tah_instance *dev = platform_get_drvdata(ofdev); + + WARN_ON(dev->users != 0); + + iounmap(dev->base); + kfree(dev); + + return 0; +} + +static const struct of_device_id tah_match[] = +{ + { + .compatible = "ibm,tah", + }, + /* For backward compat with old DT */ + { + .type = "tah", + }, + {}, +}; + +static struct platform_driver tah_driver = { + .driver = { + .name = "emac-tah", + .of_match_table = tah_match, + }, + .probe = tah_probe, + .remove = tah_remove, +}; + +int __init tah_init(void) +{ + return platform_driver_register(&tah_driver); +} + +void tah_exit(void) +{ + platform_driver_unregister(&tah_driver); +} diff --git a/drivers/net/ethernet/ibm/emac/tah.h b/drivers/net/ethernet/ibm/emac/tah.h new file mode 100644 index 000000000..86c2b6b9d --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/tah.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * drivers/net/ethernet/ibm/emac/tah.h + * + * Driver for PowerPC 4xx on-chip ethernet controller, TAH support. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright 2004 MontaVista Software, Inc. + * Matt Porter <mporter@kernel.crashing.org> + * + * Copyright (c) 2005 Eugene Surovegin <ebs@ebshome.net> + */ + +#ifndef __IBM_NEWEMAC_TAH_H +#define __IBM_NEWEMAC_TAH_H + +/* TAH */ +struct tah_regs { + u32 revid; + u32 pad[3]; + u32 mr; + u32 ssr0; + u32 ssr1; + u32 ssr2; + u32 ssr3; + u32 ssr4; + u32 ssr5; + u32 tsr; +}; + + +/* TAH device */ +struct tah_instance { + struct tah_regs __iomem *base; + + /* Only one EMAC whacks us at a time */ + struct mutex lock; + + /* number of EMACs using this TAH */ + int users; + + /* OF device instance */ + struct platform_device *ofdev; +}; + + +/* TAH engine */ +#define TAH_MR_CVR 0x80000000 +#define TAH_MR_SR 0x40000000 +#define TAH_MR_ST_256 0x01000000 +#define TAH_MR_ST_512 0x02000000 +#define TAH_MR_ST_768 0x03000000 +#define TAH_MR_ST_1024 0x04000000 +#define TAH_MR_ST_1280 0x05000000 +#define TAH_MR_ST_1536 0x06000000 +#define TAH_MR_TFS_16KB 0x00000000 +#define TAH_MR_TFS_2KB 0x00200000 +#define TAH_MR_TFS_4KB 0x00400000 +#define TAH_MR_TFS_6KB 0x00600000 +#define TAH_MR_TFS_8KB 0x00800000 +#define TAH_MR_TFS_10KB 0x00a00000 +#define TAH_MR_DTFP 0x00100000 +#define TAH_MR_DIG 0x00080000 + +#ifdef CONFIG_IBM_EMAC_TAH + +int tah_init(void); +void tah_exit(void); +int tah_attach(struct platform_device *ofdev, int channel); +void tah_detach(struct platform_device *ofdev, int channel); +void tah_reset(struct platform_device *ofdev); +int tah_get_regs_len(struct platform_device *ofdev); +void *tah_dump_regs(struct platform_device *ofdev, void *buf); + +#else + +# define tah_init() 0 +# define tah_exit() do { } while(0) +# define tah_attach(x,y) (-ENXIO) +# define tah_detach(x,y) do { } while(0) +# define tah_reset(x) do { } while(0) +# define tah_get_regs_len(x) 0 +# define tah_dump_regs(x,buf) (buf) + +#endif /* !CONFIG_IBM_EMAC_TAH */ + +#endif /* __IBM_NEWEMAC_TAH_H */ diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c new file mode 100644 index 000000000..57a25c7a9 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/zmii.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * drivers/net/ethernet/ibm/emac/zmii.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, ZMII bridge support. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Armin Kuster <akuster@mvista.com> + * Copyright 2001 MontaVista Softare Inc. + */ +#include <linux/slab.h> +#include <linux/kernel.h> +#include <linux/ethtool.h> +#include <linux/of_address.h> +#include <asm/io.h> + +#include "emac.h" +#include "core.h" + +/* ZMIIx_FER */ +#define ZMII_FER_MDI(idx) (0x80000000 >> ((idx) * 4)) +#define ZMII_FER_MDI_ALL (ZMII_FER_MDI(0) | ZMII_FER_MDI(1) | \ + ZMII_FER_MDI(2) | ZMII_FER_MDI(3)) + +#define ZMII_FER_SMII(idx) (0x40000000 >> ((idx) * 4)) +#define ZMII_FER_RMII(idx) (0x20000000 >> ((idx) * 4)) +#define ZMII_FER_MII(idx) (0x10000000 >> ((idx) * 4)) + +/* ZMIIx_SSR */ +#define ZMII_SSR_SCI(idx) (0x40000000 >> ((idx) * 4)) +#define ZMII_SSR_FSS(idx) (0x20000000 >> ((idx) * 4)) +#define ZMII_SSR_SP(idx) (0x10000000 >> ((idx) * 4)) + +/* ZMII only supports MII, RMII and SMII + * we also support autodetection for backward compatibility + */ +static inline int zmii_valid_mode(int mode) +{ + return mode == PHY_INTERFACE_MODE_MII || + mode == PHY_INTERFACE_MODE_RMII || + mode == PHY_INTERFACE_MODE_SMII || + mode == PHY_INTERFACE_MODE_NA; +} + +static inline const char *zmii_mode_name(int mode) +{ + switch (mode) { + case PHY_INTERFACE_MODE_MII: + return "MII"; + case PHY_INTERFACE_MODE_RMII: + return "RMII"; + case PHY_INTERFACE_MODE_SMII: + return "SMII"; + default: + BUG(); + } +} + +static inline u32 zmii_mode_mask(int mode, int input) +{ + switch (mode) { + case PHY_INTERFACE_MODE_MII: + return ZMII_FER_MII(input); + case PHY_INTERFACE_MODE_RMII: + return ZMII_FER_RMII(input); + case PHY_INTERFACE_MODE_SMII: + return ZMII_FER_SMII(input); + default: + return 0; + } +} + +int zmii_attach(struct platform_device *ofdev, int input, + phy_interface_t *mode) +{ + struct zmii_instance *dev = platform_get_drvdata(ofdev); + struct zmii_regs __iomem *p = dev->base; + + ZMII_DBG(dev, "init(%d, %d)" NL, input, *mode); + + if (!zmii_valid_mode(*mode)) { + /* Probably an EMAC connected to RGMII, + * but it still may need ZMII for MDIO so + * we don't fail here. + */ + dev->users++; + return 0; + } + + mutex_lock(&dev->lock); + + /* Autodetect ZMII mode if not specified. + * This is only for backward compatibility with the old driver. + * Please, always specify PHY mode in your board port to avoid + * any surprises. + */ + if (dev->mode == PHY_INTERFACE_MODE_NA) { + if (*mode == PHY_INTERFACE_MODE_NA) { + u32 r = dev->fer_save; + + ZMII_DBG(dev, "autodetecting mode, FER = 0x%08x" NL, r); + + if (r & (ZMII_FER_MII(0) | ZMII_FER_MII(1))) + dev->mode = PHY_INTERFACE_MODE_MII; + else if (r & (ZMII_FER_RMII(0) | ZMII_FER_RMII(1))) + dev->mode = PHY_INTERFACE_MODE_RMII; + else + dev->mode = PHY_INTERFACE_MODE_SMII; + } else { + dev->mode = *mode; + } + printk(KERN_NOTICE "%pOF: bridge in %s mode\n", + ofdev->dev.of_node, + zmii_mode_name(dev->mode)); + } else { + /* All inputs must use the same mode */ + if (*mode != PHY_INTERFACE_MODE_NA && *mode != dev->mode) { + printk(KERN_ERR + "%pOF: invalid mode %d specified for input %d\n", + ofdev->dev.of_node, *mode, input); + mutex_unlock(&dev->lock); + return -EINVAL; + } + } + + /* Report back correct PHY mode, + * it may be used during PHY initialization. + */ + *mode = dev->mode; + + /* Enable this input */ + out_be32(&p->fer, in_be32(&p->fer) | zmii_mode_mask(dev->mode, input)); + ++dev->users; + + mutex_unlock(&dev->lock); + + return 0; +} + +void zmii_get_mdio(struct platform_device *ofdev, int input) +{ + struct zmii_instance *dev = platform_get_drvdata(ofdev); + u32 fer; + + ZMII_DBG2(dev, "get_mdio(%d)" NL, input); + + mutex_lock(&dev->lock); + + fer = in_be32(&dev->base->fer) & ~ZMII_FER_MDI_ALL; + out_be32(&dev->base->fer, fer | ZMII_FER_MDI(input)); +} + +void zmii_put_mdio(struct platform_device *ofdev, int input) +{ + struct zmii_instance *dev = platform_get_drvdata(ofdev); + + ZMII_DBG2(dev, "put_mdio(%d)" NL, input); + mutex_unlock(&dev->lock); +} + + +void zmii_set_speed(struct platform_device *ofdev, int input, int speed) +{ + struct zmii_instance *dev = platform_get_drvdata(ofdev); + u32 ssr; + + mutex_lock(&dev->lock); + + ssr = in_be32(&dev->base->ssr); + + ZMII_DBG(dev, "speed(%d, %d)" NL, input, speed); + + if (speed == SPEED_100) + ssr |= ZMII_SSR_SP(input); + else + ssr &= ~ZMII_SSR_SP(input); + + out_be32(&dev->base->ssr, ssr); + + mutex_unlock(&dev->lock); +} + +void zmii_detach(struct platform_device *ofdev, int input) +{ + struct zmii_instance *dev = platform_get_drvdata(ofdev); + + BUG_ON(!dev || dev->users == 0); + + mutex_lock(&dev->lock); + + ZMII_DBG(dev, "detach(%d)" NL, input); + + /* Disable this input */ + out_be32(&dev->base->fer, + in_be32(&dev->base->fer) & ~zmii_mode_mask(dev->mode, input)); + + --dev->users; + + mutex_unlock(&dev->lock); +} + +int zmii_get_regs_len(struct platform_device *ofdev) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct zmii_regs); +} + +void *zmii_dump_regs(struct platform_device *ofdev, void *buf) +{ + struct zmii_instance *dev = platform_get_drvdata(ofdev); + struct emac_ethtool_regs_subhdr *hdr = buf; + struct zmii_regs *regs = (struct zmii_regs *)(hdr + 1); + + hdr->version = 0; + hdr->index = 0; /* for now, are there chips with more than one + * zmii ? if yes, then we'll add a cell_index + * like we do for emac + */ + memcpy_fromio(regs, dev->base, sizeof(struct zmii_regs)); + return regs + 1; +} + +static int zmii_probe(struct platform_device *ofdev) +{ + struct device_node *np = ofdev->dev.of_node; + struct zmii_instance *dev; + struct resource regs; + int rc; + + rc = -ENOMEM; + dev = kzalloc(sizeof(struct zmii_instance), GFP_KERNEL); + if (dev == NULL) + goto err_gone; + + mutex_init(&dev->lock); + dev->ofdev = ofdev; + dev->mode = PHY_INTERFACE_MODE_NA; + + rc = -ENXIO; + if (of_address_to_resource(np, 0, ®s)) { + printk(KERN_ERR "%pOF: Can't get registers address\n", np); + goto err_free; + } + + rc = -ENOMEM; + dev->base = (struct zmii_regs __iomem *)ioremap(regs.start, + sizeof(struct zmii_regs)); + if (dev->base == NULL) { + printk(KERN_ERR "%pOF: Can't map device registers!\n", np); + goto err_free; + } + + /* We may need FER value for autodetection later */ + dev->fer_save = in_be32(&dev->base->fer); + + /* Disable all inputs by default */ + out_be32(&dev->base->fer, 0); + + printk(KERN_INFO "ZMII %pOF initialized\n", ofdev->dev.of_node); + wmb(); + platform_set_drvdata(ofdev, dev); + + return 0; + + err_free: + kfree(dev); + err_gone: + return rc; +} + +static int zmii_remove(struct platform_device *ofdev) +{ + struct zmii_instance *dev = platform_get_drvdata(ofdev); + + WARN_ON(dev->users != 0); + + iounmap(dev->base); + kfree(dev); + + return 0; +} + +static const struct of_device_id zmii_match[] = +{ + { + .compatible = "ibm,zmii", + }, + /* For backward compat with old DT */ + { + .type = "emac-zmii", + }, + {}, +}; + +static struct platform_driver zmii_driver = { + .driver = { + .name = "emac-zmii", + .of_match_table = zmii_match, + }, + .probe = zmii_probe, + .remove = zmii_remove, +}; + +int __init zmii_init(void) +{ + return platform_driver_register(&zmii_driver); +} + +void zmii_exit(void) +{ + platform_driver_unregister(&zmii_driver); +} diff --git a/drivers/net/ethernet/ibm/emac/zmii.h b/drivers/net/ethernet/ibm/emac/zmii.h new file mode 100644 index 000000000..65daedc78 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/zmii.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * drivers/net/ethernet/ibm/emac/zmii.h + * + * Driver for PowerPC 4xx on-chip ethernet controller, ZMII bridge support. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Armin Kuster <akuster@mvista.com> + * Copyright 2001 MontaVista Softare Inc. + */ +#ifndef __IBM_NEWEMAC_ZMII_H +#define __IBM_NEWEMAC_ZMII_H + +/* ZMII bridge registers */ +struct zmii_regs { + u32 fer; /* Function enable reg */ + u32 ssr; /* Speed select reg */ + u32 smiirs; /* SMII status reg */ +}; + +/* ZMII device */ +struct zmii_instance { + struct zmii_regs __iomem *base; + + /* Only one EMAC whacks us at a time */ + struct mutex lock; + + /* subset of PHY_MODE_XXXX */ + int mode; + + /* number of EMACs using this ZMII bridge */ + int users; + + /* FER value left by firmware */ + u32 fer_save; + + /* OF device instance */ + struct platform_device *ofdev; +}; + +#ifdef CONFIG_IBM_EMAC_ZMII + +int zmii_init(void); +void zmii_exit(void); +int zmii_attach(struct platform_device *ofdev, int input, + phy_interface_t *mode); +void zmii_detach(struct platform_device *ofdev, int input); +void zmii_get_mdio(struct platform_device *ofdev, int input); +void zmii_put_mdio(struct platform_device *ofdev, int input); +void zmii_set_speed(struct platform_device *ofdev, int input, int speed); +int zmii_get_regs_len(struct platform_device *ocpdev); +void *zmii_dump_regs(struct platform_device *ofdev, void *buf); + +#else +# define zmii_init() 0 +# define zmii_exit() do { } while(0) +# define zmii_attach(x,y,z) (-ENXIO) +# define zmii_detach(x,y) do { } while(0) +# define zmii_get_mdio(x,y) do { } while(0) +# define zmii_put_mdio(x,y) do { } while(0) +# define zmii_set_speed(x,y,z) do { } while(0) +# define zmii_get_regs_len(x) 0 +# define zmii_dump_regs(x,buf) (buf) +#endif /* !CONFIG_IBM_EMAC_ZMII */ + +#endif /* __IBM_NEWEMAC_ZMII_H */ diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c new file mode 100644 index 000000000..1d21a2812 --- /dev/null +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -0,0 +1,1987 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * IBM Power Virtual Ethernet Device Driver + * + * Copyright (C) IBM Corporation, 2003, 2010 + * + * Authors: Dave Larson <larson1@us.ibm.com> + * Santiago Leon <santil@linux.vnet.ibm.com> + * Brian King <brking@linux.vnet.ibm.com> + * Robert Jennings <rcj@linux.vnet.ibm.com> + * Anton Blanchard <anton@au.ibm.com> + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/dma-mapping.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/mm.h> +#include <linux/pm.h> +#include <linux/ethtool.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/slab.h> +#include <asm/hvcall.h> +#include <linux/atomic.h> +#include <asm/vio.h> +#include <asm/iommu.h> +#include <asm/firmware.h> +#include <net/tcp.h> +#include <net/ip6_checksum.h> + +#include "ibmveth.h" + +static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance); +static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter); +static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev); + +static struct kobj_type ktype_veth_pool; + + +static const char ibmveth_driver_name[] = "ibmveth"; +static const char ibmveth_driver_string[] = "IBM Power Virtual Ethernet Driver"; +#define ibmveth_driver_version "1.06" + +MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>"); +MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(ibmveth_driver_version); + +static unsigned int tx_copybreak __read_mostly = 128; +module_param(tx_copybreak, uint, 0644); +MODULE_PARM_DESC(tx_copybreak, + "Maximum size of packet that is copied to a new buffer on transmit"); + +static unsigned int rx_copybreak __read_mostly = 128; +module_param(rx_copybreak, uint, 0644); +MODULE_PARM_DESC(rx_copybreak, + "Maximum size of packet that is copied to a new buffer on receive"); + +static unsigned int rx_flush __read_mostly = 0; +module_param(rx_flush, uint, 0644); +MODULE_PARM_DESC(rx_flush, "Flush receive buffers before use"); + +static bool old_large_send __read_mostly; +module_param(old_large_send, bool, 0444); +MODULE_PARM_DESC(old_large_send, + "Use old large send method on firmware that supports the new method"); + +struct ibmveth_stat { + char name[ETH_GSTRING_LEN]; + int offset; +}; + +#define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat) +#define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off)) + +static struct ibmveth_stat ibmveth_stats[] = { + { "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles) }, + { "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem) }, + { "replenish_add_buff_failure", + IBMVETH_STAT_OFF(replenish_add_buff_failure) }, + { "replenish_add_buff_success", + IBMVETH_STAT_OFF(replenish_add_buff_success) }, + { "rx_invalid_buffer", IBMVETH_STAT_OFF(rx_invalid_buffer) }, + { "rx_no_buffer", IBMVETH_STAT_OFF(rx_no_buffer) }, + { "tx_map_failed", IBMVETH_STAT_OFF(tx_map_failed) }, + { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed) }, + { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support) }, + { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support) }, + { "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets) }, + { "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets) }, + { "fw_enabled_large_send", IBMVETH_STAT_OFF(fw_large_send_support) } +}; + +/* simple methods of getting data from the current rxq entry */ +static inline u32 ibmveth_rxq_flags(struct ibmveth_adapter *adapter) +{ + return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].flags_off); +} + +static inline int ibmveth_rxq_toggle(struct ibmveth_adapter *adapter) +{ + return (ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_TOGGLE) >> + IBMVETH_RXQ_TOGGLE_SHIFT; +} + +static inline int ibmveth_rxq_pending_buffer(struct ibmveth_adapter *adapter) +{ + return ibmveth_rxq_toggle(adapter) == adapter->rx_queue.toggle; +} + +static inline int ibmveth_rxq_buffer_valid(struct ibmveth_adapter *adapter) +{ + return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_VALID; +} + +static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter *adapter) +{ + return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_OFF_MASK; +} + +static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter *adapter) +{ + return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_LRG_PKT; +} + +static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter *adapter) +{ + return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].length); +} + +static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter) +{ + return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD; +} + +static unsigned int ibmveth_real_max_tx_queues(void) +{ + unsigned int n_cpu = num_online_cpus(); + + return min(n_cpu, IBMVETH_MAX_QUEUES); +} + +/* setup the initial settings for a buffer pool */ +static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool, + u32 pool_index, u32 pool_size, + u32 buff_size, u32 pool_active) +{ + pool->size = pool_size; + pool->index = pool_index; + pool->buff_size = buff_size; + pool->threshold = pool_size * 7 / 8; + pool->active = pool_active; +} + +/* allocate and setup an buffer pool - called during open */ +static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool *pool) +{ + int i; + + pool->free_map = kmalloc_array(pool->size, sizeof(u16), GFP_KERNEL); + + if (!pool->free_map) + return -1; + + pool->dma_addr = kcalloc(pool->size, sizeof(dma_addr_t), GFP_KERNEL); + if (!pool->dma_addr) { + kfree(pool->free_map); + pool->free_map = NULL; + return -1; + } + + pool->skbuff = kcalloc(pool->size, sizeof(void *), GFP_KERNEL); + + if (!pool->skbuff) { + kfree(pool->dma_addr); + pool->dma_addr = NULL; + + kfree(pool->free_map); + pool->free_map = NULL; + return -1; + } + + for (i = 0; i < pool->size; ++i) + pool->free_map[i] = i; + + atomic_set(&pool->available, 0); + pool->producer_index = 0; + pool->consumer_index = 0; + + return 0; +} + +static inline void ibmveth_flush_buffer(void *addr, unsigned long length) +{ + unsigned long offset; + + for (offset = 0; offset < length; offset += SMP_CACHE_BYTES) + asm("dcbf %0,%1,1" :: "b" (addr), "r" (offset)); +} + +/* replenish the buffers for a pool. note that we don't need to + * skb_reserve these since they are used for incoming... + */ +static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, + struct ibmveth_buff_pool *pool) +{ + u32 i; + u32 count = pool->size - atomic_read(&pool->available); + u32 buffers_added = 0; + struct sk_buff *skb; + unsigned int free_index, index; + u64 correlator; + unsigned long lpar_rc; + dma_addr_t dma_addr; + + mb(); + + for (i = 0; i < count; ++i) { + union ibmveth_buf_desc desc; + + skb = netdev_alloc_skb(adapter->netdev, pool->buff_size); + + if (!skb) { + netdev_dbg(adapter->netdev, + "replenish: unable to allocate skb\n"); + adapter->replenish_no_mem++; + break; + } + + free_index = pool->consumer_index; + pool->consumer_index++; + if (pool->consumer_index >= pool->size) + pool->consumer_index = 0; + index = pool->free_map[free_index]; + + BUG_ON(index == IBM_VETH_INVALID_MAP); + BUG_ON(pool->skbuff[index] != NULL); + + dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, + pool->buff_size, DMA_FROM_DEVICE); + + if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) + goto failure; + + pool->free_map[free_index] = IBM_VETH_INVALID_MAP; + pool->dma_addr[index] = dma_addr; + pool->skbuff[index] = skb; + + correlator = ((u64)pool->index << 32) | index; + *(u64 *)skb->data = correlator; + + desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size; + desc.fields.address = dma_addr; + + if (rx_flush) { + unsigned int len = min(pool->buff_size, + adapter->netdev->mtu + + IBMVETH_BUFF_OH); + ibmveth_flush_buffer(skb->data, len); + } + lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, + desc.desc); + + if (lpar_rc != H_SUCCESS) { + goto failure; + } else { + buffers_added++; + adapter->replenish_add_buff_success++; + } + } + + mb(); + atomic_add(buffers_added, &(pool->available)); + return; + +failure: + pool->free_map[free_index] = index; + pool->skbuff[index] = NULL; + if (pool->consumer_index == 0) + pool->consumer_index = pool->size - 1; + else + pool->consumer_index--; + if (!dma_mapping_error(&adapter->vdev->dev, dma_addr)) + dma_unmap_single(&adapter->vdev->dev, + pool->dma_addr[index], pool->buff_size, + DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); + adapter->replenish_add_buff_failure++; + + mb(); + atomic_add(buffers_added, &(pool->available)); +} + +/* + * The final 8 bytes of the buffer list is a counter of frames dropped + * because there was not a buffer in the buffer list capable of holding + * the frame. + */ +static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter *adapter) +{ + __be64 *p = adapter->buffer_list_addr + 4096 - 8; + + adapter->rx_no_buffer = be64_to_cpup(p); +} + +/* replenish routine */ +static void ibmveth_replenish_task(struct ibmveth_adapter *adapter) +{ + int i; + + adapter->replenish_task_cycles++; + + for (i = (IBMVETH_NUM_BUFF_POOLS - 1); i >= 0; i--) { + struct ibmveth_buff_pool *pool = &adapter->rx_buff_pool[i]; + + if (pool->active && + (atomic_read(&pool->available) < pool->threshold)) + ibmveth_replenish_buffer_pool(adapter, pool); + } + + ibmveth_update_rx_no_buffer(adapter); +} + +/* empty and free ana buffer pool - also used to do cleanup in error paths */ +static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter, + struct ibmveth_buff_pool *pool) +{ + int i; + + kfree(pool->free_map); + pool->free_map = NULL; + + if (pool->skbuff && pool->dma_addr) { + for (i = 0; i < pool->size; ++i) { + struct sk_buff *skb = pool->skbuff[i]; + if (skb) { + dma_unmap_single(&adapter->vdev->dev, + pool->dma_addr[i], + pool->buff_size, + DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); + pool->skbuff[i] = NULL; + } + } + } + + if (pool->dma_addr) { + kfree(pool->dma_addr); + pool->dma_addr = NULL; + } + + if (pool->skbuff) { + kfree(pool->skbuff); + pool->skbuff = NULL; + } +} + +/* remove a buffer from a pool */ +static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, + u64 correlator) +{ + unsigned int pool = correlator >> 32; + unsigned int index = correlator & 0xffffffffUL; + unsigned int free_index; + struct sk_buff *skb; + + BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); + BUG_ON(index >= adapter->rx_buff_pool[pool].size); + + skb = adapter->rx_buff_pool[pool].skbuff[index]; + + BUG_ON(skb == NULL); + + adapter->rx_buff_pool[pool].skbuff[index] = NULL; + + dma_unmap_single(&adapter->vdev->dev, + adapter->rx_buff_pool[pool].dma_addr[index], + adapter->rx_buff_pool[pool].buff_size, + DMA_FROM_DEVICE); + + free_index = adapter->rx_buff_pool[pool].producer_index; + adapter->rx_buff_pool[pool].producer_index++; + if (adapter->rx_buff_pool[pool].producer_index >= + adapter->rx_buff_pool[pool].size) + adapter->rx_buff_pool[pool].producer_index = 0; + adapter->rx_buff_pool[pool].free_map[free_index] = index; + + mb(); + + atomic_dec(&(adapter->rx_buff_pool[pool].available)); +} + +/* get the current buffer on the rx queue */ +static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *adapter) +{ + u64 correlator = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator; + unsigned int pool = correlator >> 32; + unsigned int index = correlator & 0xffffffffUL; + + BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); + BUG_ON(index >= adapter->rx_buff_pool[pool].size); + + return adapter->rx_buff_pool[pool].skbuff[index]; +} + +/* recycle the current buffer on the rx queue */ +static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) +{ + u32 q_index = adapter->rx_queue.index; + u64 correlator = adapter->rx_queue.queue_addr[q_index].correlator; + unsigned int pool = correlator >> 32; + unsigned int index = correlator & 0xffffffffUL; + union ibmveth_buf_desc desc; + unsigned long lpar_rc; + int ret = 1; + + BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); + BUG_ON(index >= adapter->rx_buff_pool[pool].size); + + if (!adapter->rx_buff_pool[pool].active) { + ibmveth_rxq_harvest_buffer(adapter); + ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[pool]); + goto out; + } + + desc.fields.flags_len = IBMVETH_BUF_VALID | + adapter->rx_buff_pool[pool].buff_size; + desc.fields.address = adapter->rx_buff_pool[pool].dma_addr[index]; + + lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc); + + if (lpar_rc != H_SUCCESS) { + netdev_dbg(adapter->netdev, "h_add_logical_lan_buffer failed " + "during recycle rc=%ld", lpar_rc); + ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator); + ret = 0; + } + + if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { + adapter->rx_queue.index = 0; + adapter->rx_queue.toggle = !adapter->rx_queue.toggle; + } + +out: + return ret; +} + +static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter) +{ + ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator); + + if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { + adapter->rx_queue.index = 0; + adapter->rx_queue.toggle = !adapter->rx_queue.toggle; + } +} + +static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx) +{ + dma_unmap_single(&adapter->vdev->dev, adapter->tx_ltb_dma[idx], + adapter->tx_ltb_size, DMA_TO_DEVICE); + kfree(adapter->tx_ltb_ptr[idx]); + adapter->tx_ltb_ptr[idx] = NULL; +} + +static int ibmveth_allocate_tx_ltb(struct ibmveth_adapter *adapter, int idx) +{ + adapter->tx_ltb_ptr[idx] = kzalloc(adapter->tx_ltb_size, + GFP_KERNEL); + if (!adapter->tx_ltb_ptr[idx]) { + netdev_err(adapter->netdev, + "unable to allocate tx long term buffer\n"); + return -ENOMEM; + } + adapter->tx_ltb_dma[idx] = dma_map_single(&adapter->vdev->dev, + adapter->tx_ltb_ptr[idx], + adapter->tx_ltb_size, + DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->vdev->dev, adapter->tx_ltb_dma[idx])) { + netdev_err(adapter->netdev, + "unable to DMA map tx long term buffer\n"); + kfree(adapter->tx_ltb_ptr[idx]); + adapter->tx_ltb_ptr[idx] = NULL; + return -ENOMEM; + } + + return 0; +} + +static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter, + union ibmveth_buf_desc rxq_desc, u64 mac_address) +{ + int rc, try_again = 1; + + /* + * After a kexec the adapter will still be open, so our attempt to + * open it will fail. So if we get a failure we free the adapter and + * try again, but only once. + */ +retry: + rc = h_register_logical_lan(adapter->vdev->unit_address, + adapter->buffer_list_dma, rxq_desc.desc, + adapter->filter_list_dma, mac_address); + + if (rc != H_SUCCESS && try_again) { + do { + rc = h_free_logical_lan(adapter->vdev->unit_address); + } while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY)); + + try_again = 0; + goto retry; + } + + return rc; +} + +static int ibmveth_open(struct net_device *netdev) +{ + struct ibmveth_adapter *adapter = netdev_priv(netdev); + u64 mac_address; + int rxq_entries = 1; + unsigned long lpar_rc; + int rc; + union ibmveth_buf_desc rxq_desc; + int i; + struct device *dev; + + netdev_dbg(netdev, "open starting\n"); + + napi_enable(&adapter->napi); + + for(i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + rxq_entries += adapter->rx_buff_pool[i].size; + + rc = -ENOMEM; + adapter->buffer_list_addr = (void*) get_zeroed_page(GFP_KERNEL); + if (!adapter->buffer_list_addr) { + netdev_err(netdev, "unable to allocate list pages\n"); + goto out; + } + + adapter->filter_list_addr = (void*) get_zeroed_page(GFP_KERNEL); + if (!adapter->filter_list_addr) { + netdev_err(netdev, "unable to allocate filter pages\n"); + goto out_free_buffer_list; + } + + dev = &adapter->vdev->dev; + + adapter->rx_queue.queue_len = sizeof(struct ibmveth_rx_q_entry) * + rxq_entries; + adapter->rx_queue.queue_addr = + dma_alloc_coherent(dev, adapter->rx_queue.queue_len, + &adapter->rx_queue.queue_dma, GFP_KERNEL); + if (!adapter->rx_queue.queue_addr) + goto out_free_filter_list; + + adapter->buffer_list_dma = dma_map_single(dev, + adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, adapter->buffer_list_dma)) { + netdev_err(netdev, "unable to map buffer list pages\n"); + goto out_free_queue_mem; + } + + adapter->filter_list_dma = dma_map_single(dev, + adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, adapter->filter_list_dma)) { + netdev_err(netdev, "unable to map filter list pages\n"); + goto out_unmap_buffer_list; + } + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + if (ibmveth_allocate_tx_ltb(adapter, i)) + goto out_free_tx_ltb; + } + + adapter->rx_queue.index = 0; + adapter->rx_queue.num_slots = rxq_entries; + adapter->rx_queue.toggle = 1; + + mac_address = ether_addr_to_u64(netdev->dev_addr); + + rxq_desc.fields.flags_len = IBMVETH_BUF_VALID | + adapter->rx_queue.queue_len; + rxq_desc.fields.address = adapter->rx_queue.queue_dma; + + netdev_dbg(netdev, "buffer list @ 0x%p\n", adapter->buffer_list_addr); + netdev_dbg(netdev, "filter list @ 0x%p\n", adapter->filter_list_addr); + netdev_dbg(netdev, "receive q @ 0x%p\n", adapter->rx_queue.queue_addr); + + h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); + + lpar_rc = ibmveth_register_logical_lan(adapter, rxq_desc, mac_address); + + if (lpar_rc != H_SUCCESS) { + netdev_err(netdev, "h_register_logical_lan failed with %ld\n", + lpar_rc); + netdev_err(netdev, "buffer TCE:0x%llx filter TCE:0x%llx rxq " + "desc:0x%llx MAC:0x%llx\n", + adapter->buffer_list_dma, + adapter->filter_list_dma, + rxq_desc.desc, + mac_address); + rc = -ENONET; + goto out_unmap_filter_list; + } + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { + if (!adapter->rx_buff_pool[i].active) + continue; + if (ibmveth_alloc_buffer_pool(&adapter->rx_buff_pool[i])) { + netdev_err(netdev, "unable to alloc pool\n"); + adapter->rx_buff_pool[i].active = 0; + rc = -ENOMEM; + goto out_free_buffer_pools; + } + } + + netdev_dbg(netdev, "registering irq 0x%x\n", netdev->irq); + rc = request_irq(netdev->irq, ibmveth_interrupt, 0, netdev->name, + netdev); + if (rc != 0) { + netdev_err(netdev, "unable to request irq 0x%x, rc %d\n", + netdev->irq, rc); + do { + lpar_rc = h_free_logical_lan(adapter->vdev->unit_address); + } while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY)); + + goto out_free_buffer_pools; + } + + rc = -ENOMEM; + + netdev_dbg(netdev, "initial replenish cycle\n"); + ibmveth_interrupt(netdev->irq, netdev); + + netif_tx_start_all_queues(netdev); + + netdev_dbg(netdev, "open complete\n"); + + return 0; + +out_free_buffer_pools: + while (--i >= 0) { + if (adapter->rx_buff_pool[i].active) + ibmveth_free_buffer_pool(adapter, + &adapter->rx_buff_pool[i]); + } +out_unmap_filter_list: + dma_unmap_single(dev, adapter->filter_list_dma, 4096, + DMA_BIDIRECTIONAL); + +out_free_tx_ltb: + while (--i >= 0) { + ibmveth_free_tx_ltb(adapter, i); + } + +out_unmap_buffer_list: + dma_unmap_single(dev, adapter->buffer_list_dma, 4096, + DMA_BIDIRECTIONAL); +out_free_queue_mem: + dma_free_coherent(dev, adapter->rx_queue.queue_len, + adapter->rx_queue.queue_addr, + adapter->rx_queue.queue_dma); +out_free_filter_list: + free_page((unsigned long)adapter->filter_list_addr); +out_free_buffer_list: + free_page((unsigned long)adapter->buffer_list_addr); +out: + napi_disable(&adapter->napi); + return rc; +} + +static int ibmveth_close(struct net_device *netdev) +{ + struct ibmveth_adapter *adapter = netdev_priv(netdev); + struct device *dev = &adapter->vdev->dev; + long lpar_rc; + int i; + + netdev_dbg(netdev, "close starting\n"); + + napi_disable(&adapter->napi); + + if (!adapter->pool_config) + netif_tx_stop_all_queues(netdev); + + h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); + + do { + lpar_rc = h_free_logical_lan(adapter->vdev->unit_address); + } while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY)); + + if (lpar_rc != H_SUCCESS) { + netdev_err(netdev, "h_free_logical_lan failed with %lx, " + "continuing with close\n", lpar_rc); + } + + free_irq(netdev->irq, netdev); + + ibmveth_update_rx_no_buffer(adapter); + + dma_unmap_single(dev, adapter->buffer_list_dma, 4096, + DMA_BIDIRECTIONAL); + free_page((unsigned long)adapter->buffer_list_addr); + + dma_unmap_single(dev, adapter->filter_list_dma, 4096, + DMA_BIDIRECTIONAL); + free_page((unsigned long)adapter->filter_list_addr); + + dma_free_coherent(dev, adapter->rx_queue.queue_len, + adapter->rx_queue.queue_addr, + adapter->rx_queue.queue_dma); + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + if (adapter->rx_buff_pool[i].active) + ibmveth_free_buffer_pool(adapter, + &adapter->rx_buff_pool[i]); + + for (i = 0; i < netdev->real_num_tx_queues; i++) + ibmveth_free_tx_ltb(adapter, i); + + netdev_dbg(netdev, "close complete\n"); + + return 0; +} + +static int ibmveth_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + + return ethtool_virtdev_set_link_ksettings(dev, cmd, + &adapter->speed, + &adapter->duplex); +} + +static int ibmveth_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + + cmd->base.speed = adapter->speed; + cmd->base.duplex = adapter->duplex; + cmd->base.port = PORT_OTHER; + + return 0; +} + +static void ibmveth_init_link_settings(struct net_device *dev) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + + adapter->speed = SPEED_1000; + adapter->duplex = DUPLEX_FULL; +} + +static void netdev_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strscpy(info->driver, ibmveth_driver_name, sizeof(info->driver)); + strscpy(info->version, ibmveth_driver_version, sizeof(info->version)); +} + +static netdev_features_t ibmveth_fix_features(struct net_device *dev, + netdev_features_t features) +{ + /* + * Since the ibmveth firmware interface does not have the + * concept of separate tx/rx checksum offload enable, if rx + * checksum is disabled we also have to disable tx checksum + * offload. Once we disable rx checksum offload, we are no + * longer allowed to send tx buffers that are not properly + * checksummed. + */ + + if (!(features & NETIF_F_RXCSUM)) + features &= ~NETIF_F_CSUM_MASK; + + return features; +} + +static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + unsigned long set_attr, clr_attr, ret_attr; + unsigned long set_attr6, clr_attr6; + long ret, ret4, ret6; + int rc1 = 0, rc2 = 0; + int restart = 0; + + if (netif_running(dev)) { + restart = 1; + adapter->pool_config = 1; + ibmveth_close(dev); + adapter->pool_config = 0; + } + + set_attr = 0; + clr_attr = 0; + set_attr6 = 0; + clr_attr6 = 0; + + if (data) { + set_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM; + set_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM; + } else { + clr_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM; + clr_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM; + } + + ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr); + + if (ret == H_SUCCESS && + (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) { + ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr, + set_attr, &ret_attr); + + if (ret4 != H_SUCCESS) { + netdev_err(dev, "unable to change IPv4 checksum " + "offload settings. %d rc=%ld\n", + data, ret4); + + h_illan_attributes(adapter->vdev->unit_address, + set_attr, clr_attr, &ret_attr); + + if (data == 1) + dev->features &= ~NETIF_F_IP_CSUM; + + } else { + adapter->fw_ipv4_csum_support = data; + } + + ret6 = h_illan_attributes(adapter->vdev->unit_address, + clr_attr6, set_attr6, &ret_attr); + + if (ret6 != H_SUCCESS) { + netdev_err(dev, "unable to change IPv6 checksum " + "offload settings. %d rc=%ld\n", + data, ret6); + + h_illan_attributes(adapter->vdev->unit_address, + set_attr6, clr_attr6, &ret_attr); + + if (data == 1) + dev->features &= ~NETIF_F_IPV6_CSUM; + + } else + adapter->fw_ipv6_csum_support = data; + + if (ret4 == H_SUCCESS || ret6 == H_SUCCESS) + adapter->rx_csum = data; + else + rc1 = -EIO; + } else { + rc1 = -EIO; + netdev_err(dev, "unable to change checksum offload settings." + " %d rc=%ld ret_attr=%lx\n", data, ret, + ret_attr); + } + + if (restart) + rc2 = ibmveth_open(dev); + + return rc1 ? rc1 : rc2; +} + +static int ibmveth_set_tso(struct net_device *dev, u32 data) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + unsigned long set_attr, clr_attr, ret_attr; + long ret1, ret2; + int rc1 = 0, rc2 = 0; + int restart = 0; + + if (netif_running(dev)) { + restart = 1; + adapter->pool_config = 1; + ibmveth_close(dev); + adapter->pool_config = 0; + } + + set_attr = 0; + clr_attr = 0; + + if (data) + set_attr = IBMVETH_ILLAN_LRG_SR_ENABLED; + else + clr_attr = IBMVETH_ILLAN_LRG_SR_ENABLED; + + ret1 = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr); + + if (ret1 == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_LRG_SND_SUPPORT) && + !old_large_send) { + ret2 = h_illan_attributes(adapter->vdev->unit_address, clr_attr, + set_attr, &ret_attr); + + if (ret2 != H_SUCCESS) { + netdev_err(dev, "unable to change tso settings. %d rc=%ld\n", + data, ret2); + + h_illan_attributes(adapter->vdev->unit_address, + set_attr, clr_attr, &ret_attr); + + if (data == 1) + dev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); + rc1 = -EIO; + + } else { + adapter->fw_large_send_support = data; + adapter->large_send = data; + } + } else { + /* Older firmware version of large send offload does not + * support tcp6/ipv6 + */ + if (data == 1) { + dev->features &= ~NETIF_F_TSO6; + netdev_info(dev, "TSO feature requires all partitions to have updated driver"); + } + adapter->large_send = data; + } + + if (restart) + rc2 = ibmveth_open(dev); + + return rc1 ? rc1 : rc2; +} + +static int ibmveth_set_features(struct net_device *dev, + netdev_features_t features) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + int rx_csum = !!(features & NETIF_F_RXCSUM); + int large_send = !!(features & (NETIF_F_TSO | NETIF_F_TSO6)); + int rc1 = 0, rc2 = 0; + + if (rx_csum != adapter->rx_csum) { + rc1 = ibmveth_set_csum_offload(dev, rx_csum); + if (rc1 && !adapter->rx_csum) + dev->features = + features & ~(NETIF_F_CSUM_MASK | + NETIF_F_RXCSUM); + } + + if (large_send != adapter->large_send) { + rc2 = ibmveth_set_tso(dev, large_send); + if (rc2 && !adapter->large_send) + dev->features = + features & ~(NETIF_F_TSO | NETIF_F_TSO6); + } + + return rc1 ? rc1 : rc2; +} + +static void ibmveth_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++, data += ETH_GSTRING_LEN) + memcpy(data, ibmveth_stats[i].name, ETH_GSTRING_LEN); +} + +static int ibmveth_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ibmveth_stats); + default: + return -EOPNOTSUPP; + } +} + +static void ibmveth_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + int i; + struct ibmveth_adapter *adapter = netdev_priv(dev); + + for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++) + data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset); +} + +static void ibmveth_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + channels->max_tx = ibmveth_real_max_tx_queues(); + channels->tx_count = netdev->real_num_tx_queues; + + channels->max_rx = netdev->real_num_rx_queues; + channels->rx_count = netdev->real_num_rx_queues; +} + +static int ibmveth_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ibmveth_adapter *adapter = netdev_priv(netdev); + unsigned int old = netdev->real_num_tx_queues, + goal = channels->tx_count; + int rc, i; + + /* If ndo_open has not been called yet then don't allocate, just set + * desired netdev_queue's and return + */ + if (!(netdev->flags & IFF_UP)) + return netif_set_real_num_tx_queues(netdev, goal); + + /* We have IBMVETH_MAX_QUEUES netdev_queue's allocated + * but we may need to alloc/free the ltb's. + */ + netif_tx_stop_all_queues(netdev); + + /* Allocate any queue that we need */ + for (i = old; i < goal; i++) { + if (adapter->tx_ltb_ptr[i]) + continue; + + rc = ibmveth_allocate_tx_ltb(adapter, i); + if (!rc) + continue; + + /* if something goes wrong, free everything we just allocated */ + netdev_err(netdev, "Failed to allocate more tx queues, returning to %d queues\n", + old); + goal = old; + old = i; + break; + } + rc = netif_set_real_num_tx_queues(netdev, goal); + if (rc) { + netdev_err(netdev, "Failed to set real tx queues, returning to %d queues\n", + old); + goal = old; + old = i; + } + /* Free any that are no longer needed */ + for (i = old; i > goal; i--) { + if (adapter->tx_ltb_ptr[i - 1]) + ibmveth_free_tx_ltb(adapter, i - 1); + } + + netif_tx_wake_all_queues(netdev); + + return rc; +} + +static const struct ethtool_ops netdev_ethtool_ops = { + .get_drvinfo = netdev_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = ibmveth_get_strings, + .get_sset_count = ibmveth_get_sset_count, + .get_ethtool_stats = ibmveth_get_ethtool_stats, + .get_link_ksettings = ibmveth_get_link_ksettings, + .set_link_ksettings = ibmveth_set_link_ksettings, + .get_channels = ibmveth_get_channels, + .set_channels = ibmveth_set_channels +}; + +static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + return -EOPNOTSUPP; +} + +static int ibmveth_send(struct ibmveth_adapter *adapter, + unsigned long desc, unsigned long mss) +{ + unsigned long correlator; + unsigned int retry_count; + unsigned long ret; + + /* + * The retry count sets a maximum for the number of broadcast and + * multicast destinations within the system. + */ + retry_count = 1024; + correlator = 0; + do { + ret = h_send_logical_lan(adapter->vdev->unit_address, desc, + correlator, &correlator, mss, + adapter->fw_large_send_support); + } while ((ret == H_BUSY) && (retry_count--)); + + if (ret != H_SUCCESS && ret != H_DROPPED) { + netdev_err(adapter->netdev, "tx: h_send_logical_lan failed " + "with rc=%ld\n", ret); + return 1; + } + + return 0; +} + +static int ibmveth_is_packet_unsupported(struct sk_buff *skb, + struct net_device *netdev) +{ + struct ethhdr *ether_header; + int ret = 0; + + ether_header = eth_hdr(skb); + + if (ether_addr_equal(ether_header->h_dest, netdev->dev_addr)) { + netdev_dbg(netdev, "veth doesn't support loopback packets, dropping packet.\n"); + netdev->stats.tx_dropped++; + ret = -EOPNOTSUPP; + } + + return ret; +} + +static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct ibmveth_adapter *adapter = netdev_priv(netdev); + unsigned int desc_flags, total_bytes; + union ibmveth_buf_desc desc; + int i, queue_num = skb_get_queue_mapping(skb); + unsigned long mss = 0; + + if (ibmveth_is_packet_unsupported(skb, netdev)) + goto out; + /* veth can't checksum offload UDP */ + if (skb->ip_summed == CHECKSUM_PARTIAL && + ((skb->protocol == htons(ETH_P_IP) && + ip_hdr(skb)->protocol != IPPROTO_TCP) || + (skb->protocol == htons(ETH_P_IPV6) && + ipv6_hdr(skb)->nexthdr != IPPROTO_TCP)) && + skb_checksum_help(skb)) { + + netdev_err(netdev, "tx: failed to checksum packet\n"); + netdev->stats.tx_dropped++; + goto out; + } + + desc_flags = IBMVETH_BUF_VALID; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + unsigned char *buf = skb_transport_header(skb) + + skb->csum_offset; + + desc_flags |= (IBMVETH_BUF_NO_CSUM | IBMVETH_BUF_CSUM_GOOD); + + /* Need to zero out the checksum */ + buf[0] = 0; + buf[1] = 0; + + if (skb_is_gso(skb) && adapter->fw_large_send_support) + desc_flags |= IBMVETH_BUF_LRG_SND; + } + + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) { + if (adapter->fw_large_send_support) { + mss = (unsigned long)skb_shinfo(skb)->gso_size; + adapter->tx_large_packets++; + } else if (!skb_is_gso_v6(skb)) { + /* Put -1 in the IP checksum to tell phyp it + * is a largesend packet. Put the mss in + * the TCP checksum. + */ + ip_hdr(skb)->check = 0xffff; + tcp_hdr(skb)->check = + cpu_to_be16(skb_shinfo(skb)->gso_size); + adapter->tx_large_packets++; + } + } + + /* Copy header into mapped buffer */ + if (unlikely(skb->len > adapter->tx_ltb_size)) { + netdev_err(adapter->netdev, "tx: packet size (%u) exceeds ltb (%u)\n", + skb->len, adapter->tx_ltb_size); + netdev->stats.tx_dropped++; + goto out; + } + memcpy(adapter->tx_ltb_ptr[queue_num], skb->data, skb_headlen(skb)); + total_bytes = skb_headlen(skb); + /* Copy frags into mapped buffers */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + memcpy(adapter->tx_ltb_ptr[queue_num] + total_bytes, + skb_frag_address_safe(frag), skb_frag_size(frag)); + total_bytes += skb_frag_size(frag); + } + + if (unlikely(total_bytes != skb->len)) { + netdev_err(adapter->netdev, "tx: incorrect packet len copied into ltb (%u != %u)\n", + skb->len, total_bytes); + netdev->stats.tx_dropped++; + goto out; + } + desc.fields.flags_len = desc_flags | skb->len; + desc.fields.address = adapter->tx_ltb_dma[queue_num]; + /* finish writing to long_term_buff before VIOS accessing it */ + dma_wmb(); + + if (ibmveth_send(adapter, desc.desc, mss)) { + adapter->tx_send_failed++; + netdev->stats.tx_dropped++; + } else { + netdev->stats.tx_packets++; + netdev->stats.tx_bytes += skb->len; + } + +out: + dev_consume_skb_any(skb); + return NETDEV_TX_OK; + + +} + +static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt) +{ + struct tcphdr *tcph; + int offset = 0; + int hdr_len; + + /* only TCP packets will be aggregated */ + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + + if (iph->protocol == IPPROTO_TCP) { + offset = iph->ihl * 4; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + } else { + return; + } + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *iph6 = (struct ipv6hdr *)skb->data; + + if (iph6->nexthdr == IPPROTO_TCP) { + offset = sizeof(struct ipv6hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + } else { + return; + } + } else { + return; + } + /* if mss is not set through Large Packet bit/mss in rx buffer, + * expect that the mss will be written to the tcp header checksum. + */ + tcph = (struct tcphdr *)(skb->data + offset); + if (lrg_pkt) { + skb_shinfo(skb)->gso_size = mss; + } else if (offset) { + skb_shinfo(skb)->gso_size = ntohs(tcph->check); + tcph->check = 0; + } + + if (skb_shinfo(skb)->gso_size) { + hdr_len = offset + tcph->doff * 4; + skb_shinfo(skb)->gso_segs = + DIV_ROUND_UP(skb->len - hdr_len, + skb_shinfo(skb)->gso_size); + } +} + +static void ibmveth_rx_csum_helper(struct sk_buff *skb, + struct ibmveth_adapter *adapter) +{ + struct iphdr *iph = NULL; + struct ipv6hdr *iph6 = NULL; + __be16 skb_proto = 0; + u16 iphlen = 0; + u16 iph_proto = 0; + u16 tcphdrlen = 0; + + skb_proto = be16_to_cpu(skb->protocol); + + if (skb_proto == ETH_P_IP) { + iph = (struct iphdr *)skb->data; + + /* If the IP checksum is not offloaded and if the packet + * is large send, the checksum must be rebuilt. + */ + if (iph->check == 0xffff) { + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, + iph->ihl); + } + + iphlen = iph->ihl * 4; + iph_proto = iph->protocol; + } else if (skb_proto == ETH_P_IPV6) { + iph6 = (struct ipv6hdr *)skb->data; + iphlen = sizeof(struct ipv6hdr); + iph_proto = iph6->nexthdr; + } + + /* When CSO is enabled the TCP checksum may have be set to NULL by + * the sender given that we zeroed out TCP checksum field in + * transmit path (refer ibmveth_start_xmit routine). In this case set + * up CHECKSUM_PARTIAL. If the packet is forwarded, the checksum will + * then be recalculated by the destination NIC (CSO must be enabled + * on the destination NIC). + * + * In an OVS environment, when a flow is not cached, specifically for a + * new TCP connection, the first packet information is passed up to + * the user space for finding a flow. During this process, OVS computes + * checksum on the first packet when CHECKSUM_PARTIAL flag is set. + * + * So, re-compute TCP pseudo header checksum. + */ + + if (iph_proto == IPPROTO_TCP) { + struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen); + + if (tcph->check == 0x0000) { + /* Recompute TCP pseudo header checksum */ + tcphdrlen = skb->len - iphlen; + if (skb_proto == ETH_P_IP) + tcph->check = + ~csum_tcpudp_magic(iph->saddr, + iph->daddr, tcphdrlen, iph_proto, 0); + else if (skb_proto == ETH_P_IPV6) + tcph->check = + ~csum_ipv6_magic(&iph6->saddr, + &iph6->daddr, tcphdrlen, iph_proto, 0); + /* Setup SKB fields for checksum offload */ + skb_partial_csum_set(skb, iphlen, + offsetof(struct tcphdr, check)); + skb_reset_network_header(skb); + } + } +} + +static int ibmveth_poll(struct napi_struct *napi, int budget) +{ + struct ibmveth_adapter *adapter = + container_of(napi, struct ibmveth_adapter, napi); + struct net_device *netdev = adapter->netdev; + int frames_processed = 0; + unsigned long lpar_rc; + u16 mss = 0; + + while (frames_processed < budget) { + if (!ibmveth_rxq_pending_buffer(adapter)) + break; + + smp_rmb(); + if (!ibmveth_rxq_buffer_valid(adapter)) { + wmb(); /* suggested by larson1 */ + adapter->rx_invalid_buffer++; + netdev_dbg(netdev, "recycling invalid buffer\n"); + ibmveth_rxq_recycle_buffer(adapter); + } else { + struct sk_buff *skb, *new_skb; + int length = ibmveth_rxq_frame_length(adapter); + int offset = ibmveth_rxq_frame_offset(adapter); + int csum_good = ibmveth_rxq_csum_good(adapter); + int lrg_pkt = ibmveth_rxq_large_packet(adapter); + __sum16 iph_check = 0; + + skb = ibmveth_rxq_get_buffer(adapter); + + /* if the large packet bit is set in the rx queue + * descriptor, the mss will be written by PHYP eight + * bytes from the start of the rx buffer, which is + * skb->data at this stage + */ + if (lrg_pkt) { + __be64 *rxmss = (__be64 *)(skb->data + 8); + + mss = (u16)be64_to_cpu(*rxmss); + } + + new_skb = NULL; + if (length < rx_copybreak) + new_skb = netdev_alloc_skb(netdev, length); + + if (new_skb) { + skb_copy_to_linear_data(new_skb, + skb->data + offset, + length); + if (rx_flush) + ibmveth_flush_buffer(skb->data, + length + offset); + if (!ibmveth_rxq_recycle_buffer(adapter)) + kfree_skb(skb); + skb = new_skb; + } else { + ibmveth_rxq_harvest_buffer(adapter); + skb_reserve(skb, offset); + } + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, netdev); + + /* PHYP without PLSO support places a -1 in the ip + * checksum for large send frames. + */ + if (skb->protocol == cpu_to_be16(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + + iph_check = iph->check; + } + + if ((length > netdev->mtu + ETH_HLEN) || + lrg_pkt || iph_check == 0xffff) { + ibmveth_rx_mss_helper(skb, mss, lrg_pkt); + adapter->rx_large_packets++; + } + + if (csum_good) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + ibmveth_rx_csum_helper(skb, adapter); + } + + napi_gro_receive(napi, skb); /* send it up */ + + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += length; + frames_processed++; + } + } + + ibmveth_replenish_task(adapter); + + if (frames_processed < budget) { + napi_complete_done(napi, frames_processed); + + /* We think we are done - reenable interrupts, + * then check once more to make sure we are done. + */ + lpar_rc = h_vio_signal(adapter->vdev->unit_address, + VIO_IRQ_ENABLE); + + BUG_ON(lpar_rc != H_SUCCESS); + + if (ibmveth_rxq_pending_buffer(adapter) && + napi_reschedule(napi)) { + lpar_rc = h_vio_signal(adapter->vdev->unit_address, + VIO_IRQ_DISABLE); + } + } + + return frames_processed; +} + +static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance) +{ + struct net_device *netdev = dev_instance; + struct ibmveth_adapter *adapter = netdev_priv(netdev); + unsigned long lpar_rc; + + if (napi_schedule_prep(&adapter->napi)) { + lpar_rc = h_vio_signal(adapter->vdev->unit_address, + VIO_IRQ_DISABLE); + BUG_ON(lpar_rc != H_SUCCESS); + __napi_schedule(&adapter->napi); + } + return IRQ_HANDLED; +} + +static void ibmveth_set_multicast_list(struct net_device *netdev) +{ + struct ibmveth_adapter *adapter = netdev_priv(netdev); + unsigned long lpar_rc; + + if ((netdev->flags & IFF_PROMISC) || + (netdev_mc_count(netdev) > adapter->mcastFilterSize)) { + lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, + IbmVethMcastEnableRecv | + IbmVethMcastDisableFiltering, + 0); + if (lpar_rc != H_SUCCESS) { + netdev_err(netdev, "h_multicast_ctrl rc=%ld when " + "entering promisc mode\n", lpar_rc); + } + } else { + struct netdev_hw_addr *ha; + /* clear the filter table & disable filtering */ + lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, + IbmVethMcastEnableRecv | + IbmVethMcastDisableFiltering | + IbmVethMcastClearFilterTable, + 0); + if (lpar_rc != H_SUCCESS) { + netdev_err(netdev, "h_multicast_ctrl rc=%ld when " + "attempting to clear filter table\n", + lpar_rc); + } + /* add the addresses to the filter table */ + netdev_for_each_mc_addr(ha, netdev) { + /* add the multicast address to the filter table */ + u64 mcast_addr; + mcast_addr = ether_addr_to_u64(ha->addr); + lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, + IbmVethMcastAddFilter, + mcast_addr); + if (lpar_rc != H_SUCCESS) { + netdev_err(netdev, "h_multicast_ctrl rc=%ld " + "when adding an entry to the filter " + "table\n", lpar_rc); + } + } + + /* re-enable filtering */ + lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, + IbmVethMcastEnableFiltering, + 0); + if (lpar_rc != H_SUCCESS) { + netdev_err(netdev, "h_multicast_ctrl rc=%ld when " + "enabling filtering\n", lpar_rc); + } + } +} + +static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + struct vio_dev *viodev = adapter->vdev; + int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH; + int i, rc; + int need_restart = 0; + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) + break; + + if (i == IBMVETH_NUM_BUFF_POOLS) + return -EINVAL; + + /* Deactivate all the buffer pools so that the next loop can activate + only the buffer pools necessary to hold the new MTU */ + if (netif_running(adapter->netdev)) { + need_restart = 1; + adapter->pool_config = 1; + ibmveth_close(adapter->netdev); + adapter->pool_config = 0; + } + + /* Look for an active buffer pool that can hold the new MTU */ + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { + adapter->rx_buff_pool[i].active = 1; + + if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) { + dev->mtu = new_mtu; + vio_cmo_set_dev_desired(viodev, + ibmveth_get_desired_dma + (viodev)); + if (need_restart) { + return ibmveth_open(adapter->netdev); + } + return 0; + } + } + + if (need_restart && (rc = ibmveth_open(adapter->netdev))) + return rc; + + return -EINVAL; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void ibmveth_poll_controller(struct net_device *dev) +{ + ibmveth_replenish_task(netdev_priv(dev)); + ibmveth_interrupt(dev->irq, dev); +} +#endif + +/** + * ibmveth_get_desired_dma - Calculate IO memory desired by the driver + * + * @vdev: struct vio_dev for the device whose desired IO mem is to be returned + * + * Return value: + * Number of bytes of IO data the driver will need to perform well. + */ +static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) +{ + struct net_device *netdev = dev_get_drvdata(&vdev->dev); + struct ibmveth_adapter *adapter; + struct iommu_table *tbl; + unsigned long ret; + int i; + int rxqentries = 1; + + tbl = get_iommu_table_base(&vdev->dev); + + /* netdev inits at probe time along with the structures we need below*/ + if (netdev == NULL) + return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT, tbl); + + adapter = netdev_priv(netdev); + + ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE; + ret += IOMMU_PAGE_ALIGN(netdev->mtu, tbl); + /* add size of mapped tx buffers */ + ret += IOMMU_PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE, tbl); + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { + /* add the size of the active receive buffers */ + if (adapter->rx_buff_pool[i].active) + ret += + adapter->rx_buff_pool[i].size * + IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i]. + buff_size, tbl); + rxqentries += adapter->rx_buff_pool[i].size; + } + /* add the size of the receive queue entries */ + ret += IOMMU_PAGE_ALIGN( + rxqentries * sizeof(struct ibmveth_rx_q_entry), tbl); + + return ret; +} + +static int ibmveth_set_mac_addr(struct net_device *dev, void *p) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + struct sockaddr *addr = p; + u64 mac_address; + int rc; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + mac_address = ether_addr_to_u64(addr->sa_data); + rc = h_change_logical_lan_mac(adapter->vdev->unit_address, mac_address); + if (rc) { + netdev_err(adapter->netdev, "h_change_logical_lan_mac failed with rc=%d\n", rc); + return rc; + } + + eth_hw_addr_set(dev, addr->sa_data); + + return 0; +} + +static const struct net_device_ops ibmveth_netdev_ops = { + .ndo_open = ibmveth_open, + .ndo_stop = ibmveth_close, + .ndo_start_xmit = ibmveth_start_xmit, + .ndo_set_rx_mode = ibmveth_set_multicast_list, + .ndo_eth_ioctl = ibmveth_ioctl, + .ndo_change_mtu = ibmveth_change_mtu, + .ndo_fix_features = ibmveth_fix_features, + .ndo_set_features = ibmveth_set_features, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = ibmveth_set_mac_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = ibmveth_poll_controller, +#endif +}; + +static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) +{ + int rc, i, mac_len; + struct net_device *netdev; + struct ibmveth_adapter *adapter; + unsigned char *mac_addr_p; + __be32 *mcastFilterSize_p; + long ret; + unsigned long ret_attr; + + dev_dbg(&dev->dev, "entering ibmveth_probe for UA 0x%x\n", + dev->unit_address); + + mac_addr_p = (unsigned char *)vio_get_attribute(dev, VETH_MAC_ADDR, + &mac_len); + if (!mac_addr_p) { + dev_err(&dev->dev, "Can't find VETH_MAC_ADDR attribute\n"); + return -EINVAL; + } + /* Workaround for old/broken pHyp */ + if (mac_len == 8) + mac_addr_p += 2; + else if (mac_len != 6) { + dev_err(&dev->dev, "VETH_MAC_ADDR attribute wrong len %d\n", + mac_len); + return -EINVAL; + } + + mcastFilterSize_p = (__be32 *)vio_get_attribute(dev, + VETH_MCAST_FILTER_SIZE, + NULL); + if (!mcastFilterSize_p) { + dev_err(&dev->dev, "Can't find VETH_MCAST_FILTER_SIZE " + "attribute\n"); + return -EINVAL; + } + + netdev = alloc_etherdev_mqs(sizeof(struct ibmveth_adapter), IBMVETH_MAX_QUEUES, 1); + if (!netdev) + return -ENOMEM; + + adapter = netdev_priv(netdev); + dev_set_drvdata(&dev->dev, netdev); + + adapter->vdev = dev; + adapter->netdev = netdev; + adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p); + adapter->pool_config = 0; + ibmveth_init_link_settings(netdev); + + netif_napi_add_weight(netdev, &adapter->napi, ibmveth_poll, 16); + + netdev->irq = dev->irq; + netdev->netdev_ops = &ibmveth_netdev_ops; + netdev->ethtool_ops = &netdev_ethtool_ops; + SET_NETDEV_DEV(netdev, &dev->dev); + netdev->hw_features = NETIF_F_SG; + if (vio_get_attribute(dev, "ibm,illan-options", NULL) != NULL) { + netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM; + } + + netdev->features |= netdev->hw_features; + + ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr); + + /* If running older firmware, TSO should not be enabled by default */ + if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_LRG_SND_SUPPORT) && + !old_large_send) { + netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + netdev->features |= netdev->hw_features; + } else { + netdev->hw_features |= NETIF_F_TSO; + } + + adapter->is_active_trunk = false; + if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK)) { + adapter->is_active_trunk = true; + netdev->hw_features |= NETIF_F_FRAGLIST; + netdev->features |= NETIF_F_FRAGLIST; + } + + netdev->min_mtu = IBMVETH_MIN_MTU; + netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH; + + eth_hw_addr_set(netdev, mac_addr_p); + + if (firmware_has_feature(FW_FEATURE_CMO)) + memcpy(pool_count, pool_count_cmo, sizeof(pool_count)); + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { + struct kobject *kobj = &adapter->rx_buff_pool[i].kobj; + int error; + + ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i, + pool_count[i], pool_size[i], + pool_active[i]); + error = kobject_init_and_add(kobj, &ktype_veth_pool, + &dev->dev.kobj, "pool%d", i); + if (!error) + kobject_uevent(kobj, KOBJ_ADD); + } + + rc = netif_set_real_num_tx_queues(netdev, min(num_online_cpus(), + IBMVETH_DEFAULT_QUEUES)); + if (rc) { + netdev_dbg(netdev, "failed to set number of tx queues rc=%d\n", + rc); + free_netdev(netdev); + return rc; + } + adapter->tx_ltb_size = PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE); + for (i = 0; i < IBMVETH_MAX_QUEUES; i++) + adapter->tx_ltb_ptr[i] = NULL; + + netdev_dbg(netdev, "adapter @ 0x%p\n", adapter); + netdev_dbg(netdev, "registering netdev...\n"); + + ibmveth_set_features(netdev, netdev->features); + + rc = register_netdev(netdev); + + if (rc) { + netdev_dbg(netdev, "failed to register netdev rc=%d\n", rc); + free_netdev(netdev); + return rc; + } + + netdev_dbg(netdev, "registered\n"); + + return 0; +} + +static void ibmveth_remove(struct vio_dev *dev) +{ + struct net_device *netdev = dev_get_drvdata(&dev->dev); + struct ibmveth_adapter *adapter = netdev_priv(netdev); + int i; + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + kobject_put(&adapter->rx_buff_pool[i].kobj); + + unregister_netdev(netdev); + + free_netdev(netdev); + dev_set_drvdata(&dev->dev, NULL); +} + +static struct attribute veth_active_attr; +static struct attribute veth_num_attr; +static struct attribute veth_size_attr; + +static ssize_t veth_pool_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct ibmveth_buff_pool *pool = container_of(kobj, + struct ibmveth_buff_pool, + kobj); + + if (attr == &veth_active_attr) + return sprintf(buf, "%d\n", pool->active); + else if (attr == &veth_num_attr) + return sprintf(buf, "%d\n", pool->size); + else if (attr == &veth_size_attr) + return sprintf(buf, "%d\n", pool->buff_size); + return 0; +} + +static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct ibmveth_buff_pool *pool = container_of(kobj, + struct ibmveth_buff_pool, + kobj); + struct net_device *netdev = dev_get_drvdata(kobj_to_dev(kobj->parent)); + struct ibmveth_adapter *adapter = netdev_priv(netdev); + long value = simple_strtol(buf, NULL, 10); + long rc; + + if (attr == &veth_active_attr) { + if (value && !pool->active) { + if (netif_running(netdev)) { + if (ibmveth_alloc_buffer_pool(pool)) { + netdev_err(netdev, + "unable to alloc pool\n"); + return -ENOMEM; + } + pool->active = 1; + adapter->pool_config = 1; + ibmveth_close(netdev); + adapter->pool_config = 0; + if ((rc = ibmveth_open(netdev))) + return rc; + } else { + pool->active = 1; + } + } else if (!value && pool->active) { + int mtu = netdev->mtu + IBMVETH_BUFF_OH; + int i; + /* Make sure there is a buffer pool with buffers that + can hold a packet of the size of the MTU */ + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { + if (pool == &adapter->rx_buff_pool[i]) + continue; + if (!adapter->rx_buff_pool[i].active) + continue; + if (mtu <= adapter->rx_buff_pool[i].buff_size) + break; + } + + if (i == IBMVETH_NUM_BUFF_POOLS) { + netdev_err(netdev, "no active pool >= MTU\n"); + return -EPERM; + } + + if (netif_running(netdev)) { + adapter->pool_config = 1; + ibmveth_close(netdev); + pool->active = 0; + adapter->pool_config = 0; + if ((rc = ibmveth_open(netdev))) + return rc; + } + pool->active = 0; + } + } else if (attr == &veth_num_attr) { + if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) { + return -EINVAL; + } else { + if (netif_running(netdev)) { + adapter->pool_config = 1; + ibmveth_close(netdev); + adapter->pool_config = 0; + pool->size = value; + if ((rc = ibmveth_open(netdev))) + return rc; + } else { + pool->size = value; + } + } + } else if (attr == &veth_size_attr) { + if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE) { + return -EINVAL; + } else { + if (netif_running(netdev)) { + adapter->pool_config = 1; + ibmveth_close(netdev); + adapter->pool_config = 0; + pool->buff_size = value; + if ((rc = ibmveth_open(netdev))) + return rc; + } else { + pool->buff_size = value; + } + } + } + + /* kick the interrupt handler to allocate/deallocate pools */ + ibmveth_interrupt(netdev->irq, netdev); + return count; +} + + +#define ATTR(_name, _mode) \ + struct attribute veth_##_name##_attr = { \ + .name = __stringify(_name), .mode = _mode, \ + }; + +static ATTR(active, 0644); +static ATTR(num, 0644); +static ATTR(size, 0644); + +static struct attribute *veth_pool_attrs[] = { + &veth_active_attr, + &veth_num_attr, + &veth_size_attr, + NULL, +}; +ATTRIBUTE_GROUPS(veth_pool); + +static const struct sysfs_ops veth_pool_ops = { + .show = veth_pool_show, + .store = veth_pool_store, +}; + +static struct kobj_type ktype_veth_pool = { + .release = NULL, + .sysfs_ops = &veth_pool_ops, + .default_groups = veth_pool_groups, +}; + +static int ibmveth_resume(struct device *dev) +{ + struct net_device *netdev = dev_get_drvdata(dev); + ibmveth_interrupt(netdev->irq, netdev); + return 0; +} + +static const struct vio_device_id ibmveth_device_table[] = { + { "network", "IBM,l-lan"}, + { "", "" } +}; +MODULE_DEVICE_TABLE(vio, ibmveth_device_table); + +static const struct dev_pm_ops ibmveth_pm_ops = { + .resume = ibmveth_resume +}; + +static struct vio_driver ibmveth_driver = { + .id_table = ibmveth_device_table, + .probe = ibmveth_probe, + .remove = ibmveth_remove, + .get_desired_dma = ibmveth_get_desired_dma, + .name = ibmveth_driver_name, + .pm = &ibmveth_pm_ops, +}; + +static int __init ibmveth_module_init(void) +{ + printk(KERN_DEBUG "%s: %s %s\n", ibmveth_driver_name, + ibmveth_driver_string, ibmveth_driver_version); + + return vio_register_driver(&ibmveth_driver); +} + +static void __exit ibmveth_module_exit(void) +{ + vio_unregister_driver(&ibmveth_driver); +} + +module_init(ibmveth_module_init); +module_exit(ibmveth_module_exit); diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h new file mode 100644 index 000000000..115d4c45a --- /dev/null +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * IBM Power Virtual Ethernet Device Driver + * + * Copyright (C) IBM Corporation, 2003, 2010 + * + * Authors: Dave Larson <larson1@us.ibm.com> + * Santiago Leon <santil@linux.vnet.ibm.com> + * Brian King <brking@linux.vnet.ibm.com> + * Robert Jennings <rcj@linux.vnet.ibm.com> + * Anton Blanchard <anton@au.ibm.com> + */ + +#ifndef _IBMVETH_H +#define _IBMVETH_H + +/* constants for H_MULTICAST_CTRL */ +#define IbmVethMcastReceptionModifyBit 0x80000UL +#define IbmVethMcastReceptionEnableBit 0x20000UL +#define IbmVethMcastFilterModifyBit 0x40000UL +#define IbmVethMcastFilterEnableBit 0x10000UL + +#define IbmVethMcastEnableRecv (IbmVethMcastReceptionModifyBit | IbmVethMcastReceptionEnableBit) +#define IbmVethMcastDisableRecv (IbmVethMcastReceptionModifyBit) +#define IbmVethMcastEnableFiltering (IbmVethMcastFilterModifyBit | IbmVethMcastFilterEnableBit) +#define IbmVethMcastDisableFiltering (IbmVethMcastFilterModifyBit) +#define IbmVethMcastAddFilter 0x1UL +#define IbmVethMcastRemoveFilter 0x2UL +#define IbmVethMcastClearFilterTable 0x3UL + +#define IBMVETH_ILLAN_LRG_SR_ENABLED 0x0000000000010000UL +#define IBMVETH_ILLAN_LRG_SND_SUPPORT 0x0000000000008000UL +#define IBMVETH_ILLAN_PADDED_PKT_CSUM 0x0000000000002000UL +#define IBMVETH_ILLAN_TRUNK_PRI_MASK 0x0000000000000F00UL +#define IBMVETH_ILLAN_IPV6_TCP_CSUM 0x0000000000000004UL +#define IBMVETH_ILLAN_IPV4_TCP_CSUM 0x0000000000000002UL +#define IBMVETH_ILLAN_ACTIVE_TRUNK 0x0000000000000001UL + +/* hcall macros */ +#define h_register_logical_lan(ua, buflst, rxq, fltlst, mac) \ + plpar_hcall_norets(H_REGISTER_LOGICAL_LAN, ua, buflst, rxq, fltlst, mac) + +#define h_free_logical_lan(ua) \ + plpar_hcall_norets(H_FREE_LOGICAL_LAN, ua) + +#define h_add_logical_lan_buffer(ua, buf) \ + plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf) + +/* FW allows us to send 6 descriptors but we only use one so mark + * the other 5 as unused (0) + */ +static inline long h_send_logical_lan(unsigned long unit_address, + unsigned long desc, unsigned long corellator_in, + unsigned long *corellator_out, unsigned long mss, + unsigned long large_send_support) +{ + long rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + if (large_send_support) + rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, + desc, 0, 0, 0, 0, 0, corellator_in, mss); + else + rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, + desc, 0, 0, 0, 0, 0, corellator_in); + + *corellator_out = retbuf[0]; + + return rc; +} + +static inline long h_illan_attributes(unsigned long unit_address, + unsigned long reset_mask, unsigned long set_mask, + unsigned long *ret_attributes) +{ + long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + rc = plpar_hcall(H_ILLAN_ATTRIBUTES, retbuf, unit_address, + reset_mask, set_mask); + + *ret_attributes = retbuf[0]; + + return rc; +} + +#define h_multicast_ctrl(ua, cmd, mac) \ + plpar_hcall_norets(H_MULTICAST_CTRL, ua, cmd, mac) + +#define h_change_logical_lan_mac(ua, mac) \ + plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac) + +#define IBMVETH_NUM_BUFF_POOLS 5 +#define IBMVETH_IO_ENTITLEMENT_DEFAULT 4243456 /* MTU of 1500 needs 4.2Mb */ +#define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */ +#define IBMVETH_MIN_MTU 68 +#define IBMVETH_MAX_POOL_COUNT 4096 +#define IBMVETH_BUFF_LIST_SIZE 4096 +#define IBMVETH_FILT_LIST_SIZE 4096 +#define IBMVETH_MAX_BUF_SIZE (1024 * 128) +#define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64) +#define IBMVETH_MAX_QUEUES 16U +#define IBMVETH_DEFAULT_QUEUES 8U + +static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; +static int pool_count[] = { 256, 512, 256, 256, 256 }; +static int pool_count_cmo[] = { 256, 512, 256, 256, 64 }; +static int pool_active[] = { 1, 1, 0, 0, 1}; + +#define IBM_VETH_INVALID_MAP ((u16)0xffff) + +struct ibmveth_buff_pool { + u32 size; + u32 index; + u32 buff_size; + u32 threshold; + atomic_t available; + u32 consumer_index; + u32 producer_index; + u16 *free_map; + dma_addr_t *dma_addr; + struct sk_buff **skbuff; + int active; + struct kobject kobj; +}; + +struct ibmveth_rx_q { + u64 index; + u64 num_slots; + u64 toggle; + dma_addr_t queue_dma; + u32 queue_len; + struct ibmveth_rx_q_entry *queue_addr; +}; + +struct ibmveth_adapter { + struct vio_dev *vdev; + struct net_device *netdev; + struct napi_struct napi; + unsigned int mcastFilterSize; + void * buffer_list_addr; + void * filter_list_addr; + void *tx_ltb_ptr[IBMVETH_MAX_QUEUES]; + unsigned int tx_ltb_size; + dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES]; + dma_addr_t buffer_list_dma; + dma_addr_t filter_list_dma; + struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; + struct ibmveth_rx_q rx_queue; + int pool_config; + int rx_csum; + int large_send; + bool is_active_trunk; + + u64 fw_ipv6_csum_support; + u64 fw_ipv4_csum_support; + u64 fw_large_send_support; + /* adapter specific stats */ + u64 replenish_task_cycles; + u64 replenish_no_mem; + u64 replenish_add_buff_failure; + u64 replenish_add_buff_success; + u64 rx_invalid_buffer; + u64 rx_no_buffer; + u64 tx_map_failed; + u64 tx_send_failed; + u64 tx_large_packets; + u64 rx_large_packets; + /* Ethtool settings */ + u8 duplex; + u32 speed; +}; + +/* + * We pass struct ibmveth_buf_desc_fields to the hypervisor in registers, + * so we don't need to byteswap the two elements. However since we use + * a union (ibmveth_buf_desc) to convert from the struct to a u64 we + * do end up with endian specific ordering of the elements and that + * needs correcting. + */ +struct ibmveth_buf_desc_fields { +#ifdef __BIG_ENDIAN + u32 flags_len; + u32 address; +#else + u32 address; + u32 flags_len; +#endif +#define IBMVETH_BUF_VALID 0x80000000 +#define IBMVETH_BUF_TOGGLE 0x40000000 +#define IBMVETH_BUF_LRG_SND 0x04000000 +#define IBMVETH_BUF_NO_CSUM 0x02000000 +#define IBMVETH_BUF_CSUM_GOOD 0x01000000 +#define IBMVETH_BUF_LEN_MASK 0x00FFFFFF +}; + +union ibmveth_buf_desc { + u64 desc; + struct ibmveth_buf_desc_fields fields; +}; + +struct ibmveth_rx_q_entry { + __be32 flags_off; +#define IBMVETH_RXQ_TOGGLE 0x80000000 +#define IBMVETH_RXQ_TOGGLE_SHIFT 31 +#define IBMVETH_RXQ_VALID 0x40000000 +#define IBMVETH_RXQ_LRG_PKT 0x04000000 +#define IBMVETH_RXQ_NO_CSUM 0x02000000 +#define IBMVETH_RXQ_CSUM_GOOD 0x01000000 +#define IBMVETH_RXQ_OFF_MASK 0x0000FFFF + + __be32 length; + /* correlator is only used by the OS, no need to byte swap */ + u64 correlator; +}; + +#endif /* _IBMVETH_H */ diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c new file mode 100644 index 000000000..157be4e9b --- /dev/null +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -0,0 +1,6444 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/**************************************************************************/ +/* */ +/* IBM System i and System p Virtual NIC Device Driver */ +/* Copyright (C) 2014 IBM Corp. */ +/* Santiago Leon (santi_leon@yahoo.com) */ +/* Thomas Falcon (tlfalcon@linux.vnet.ibm.com) */ +/* John Allen (jallen@linux.vnet.ibm.com) */ +/* */ +/* */ +/* This module contains the implementation of a virtual ethernet device */ +/* for use with IBM i/p Series LPAR Linux. It utilizes the logical LAN */ +/* option of the RS/6000 Platform Architecture to interface with virtual */ +/* ethernet NICs that are presented to the partition by the hypervisor. */ +/* */ +/* Messages are passed between the VNIC driver and the VNIC server using */ +/* Command/Response Queues (CRQs) and sub CRQs (sCRQs). CRQs are used to */ +/* issue and receive commands that initiate communication with the server */ +/* on driver initialization. Sub CRQs (sCRQs) are similar to CRQs, but */ +/* are used by the driver to notify the server that a packet is */ +/* ready for transmission or that a buffer has been added to receive a */ +/* packet. Subsequently, sCRQs are used by the server to notify the */ +/* driver that a packet transmission has been completed or that a packet */ +/* has been received and placed in a waiting buffer. */ +/* */ +/* In lieu of a more conventional "on-the-fly" DMA mapping strategy in */ +/* which skbs are DMA mapped and immediately unmapped when the transmit */ +/* or receive has been completed, the VNIC driver is required to use */ +/* "long term mapping". This entails that large, continuous DMA mapped */ +/* buffers are allocated on driver initialization and these buffers are */ +/* then continuously reused to pass skbs to and from the VNIC server. */ +/* */ +/**************************************************************************/ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/completion.h> +#include <linux/ioport.h> +#include <linux/dma-mapping.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/mm.h> +#include <linux/ethtool.h> +#include <linux/proc_fs.h> +#include <linux/if_arp.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/kthread.h> +#include <linux/seq_file.h> +#include <linux/interrupt.h> +#include <net/net_namespace.h> +#include <asm/hvcall.h> +#include <linux/atomic.h> +#include <asm/vio.h> +#include <asm/xive.h> +#include <asm/iommu.h> +#include <linux/uaccess.h> +#include <asm/firmware.h> +#include <linux/workqueue.h> +#include <linux/if_vlan.h> +#include <linux/utsname.h> + +#include "ibmvnic.h" + +static const char ibmvnic_driver_name[] = "ibmvnic"; +static const char ibmvnic_driver_string[] = "IBM System i/p Virtual NIC Driver"; + +MODULE_AUTHOR("Santiago Leon"); +MODULE_DESCRIPTION("IBM System i/p Virtual NIC Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(IBMVNIC_DRIVER_VERSION); + +static int ibmvnic_version = IBMVNIC_INITIAL_VERSION; +static void release_sub_crqs(struct ibmvnic_adapter *, bool); +static int ibmvnic_reset_crq(struct ibmvnic_adapter *); +static int ibmvnic_send_crq_init(struct ibmvnic_adapter *); +static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *); +static int ibmvnic_send_crq(struct ibmvnic_adapter *, union ibmvnic_crq *); +static int send_subcrq_indirect(struct ibmvnic_adapter *, u64, u64, u64); +static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance); +static int enable_scrq_irq(struct ibmvnic_adapter *, + struct ibmvnic_sub_crq_queue *); +static int disable_scrq_irq(struct ibmvnic_adapter *, + struct ibmvnic_sub_crq_queue *); +static int pending_scrq(struct ibmvnic_adapter *, + struct ibmvnic_sub_crq_queue *); +static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *, + struct ibmvnic_sub_crq_queue *); +static int ibmvnic_poll(struct napi_struct *napi, int data); +static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter); +static inline void reinit_init_done(struct ibmvnic_adapter *adapter); +static void send_query_map(struct ibmvnic_adapter *adapter); +static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8); +static int send_request_unmap(struct ibmvnic_adapter *, u8); +static int send_login(struct ibmvnic_adapter *adapter); +static void send_query_cap(struct ibmvnic_adapter *adapter); +static int init_sub_crqs(struct ibmvnic_adapter *); +static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter); +static int ibmvnic_reset_init(struct ibmvnic_adapter *, bool reset); +static void release_crq_queue(struct ibmvnic_adapter *); +static int __ibmvnic_set_mac(struct net_device *, u8 *); +static int init_crq_queue(struct ibmvnic_adapter *adapter); +static int send_query_phys_parms(struct ibmvnic_adapter *adapter); +static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq); +static void free_long_term_buff(struct ibmvnic_adapter *adapter, + struct ibmvnic_long_term_buff *ltb); +static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter); +static void flush_reset_queue(struct ibmvnic_adapter *adapter); + +struct ibmvnic_stat { + char name[ETH_GSTRING_LEN]; + int offset; +}; + +#define IBMVNIC_STAT_OFF(stat) (offsetof(struct ibmvnic_adapter, stats) + \ + offsetof(struct ibmvnic_statistics, stat)) +#define IBMVNIC_GET_STAT(a, off) (*((u64 *)(((unsigned long)(a)) + (off)))) + +static const struct ibmvnic_stat ibmvnic_stats[] = { + {"rx_packets", IBMVNIC_STAT_OFF(rx_packets)}, + {"rx_bytes", IBMVNIC_STAT_OFF(rx_bytes)}, + {"tx_packets", IBMVNIC_STAT_OFF(tx_packets)}, + {"tx_bytes", IBMVNIC_STAT_OFF(tx_bytes)}, + {"ucast_tx_packets", IBMVNIC_STAT_OFF(ucast_tx_packets)}, + {"ucast_rx_packets", IBMVNIC_STAT_OFF(ucast_rx_packets)}, + {"mcast_tx_packets", IBMVNIC_STAT_OFF(mcast_tx_packets)}, + {"mcast_rx_packets", IBMVNIC_STAT_OFF(mcast_rx_packets)}, + {"bcast_tx_packets", IBMVNIC_STAT_OFF(bcast_tx_packets)}, + {"bcast_rx_packets", IBMVNIC_STAT_OFF(bcast_rx_packets)}, + {"align_errors", IBMVNIC_STAT_OFF(align_errors)}, + {"fcs_errors", IBMVNIC_STAT_OFF(fcs_errors)}, + {"single_collision_frames", IBMVNIC_STAT_OFF(single_collision_frames)}, + {"multi_collision_frames", IBMVNIC_STAT_OFF(multi_collision_frames)}, + {"sqe_test_errors", IBMVNIC_STAT_OFF(sqe_test_errors)}, + {"deferred_tx", IBMVNIC_STAT_OFF(deferred_tx)}, + {"late_collisions", IBMVNIC_STAT_OFF(late_collisions)}, + {"excess_collisions", IBMVNIC_STAT_OFF(excess_collisions)}, + {"internal_mac_tx_errors", IBMVNIC_STAT_OFF(internal_mac_tx_errors)}, + {"carrier_sense", IBMVNIC_STAT_OFF(carrier_sense)}, + {"too_long_frames", IBMVNIC_STAT_OFF(too_long_frames)}, + {"internal_mac_rx_errors", IBMVNIC_STAT_OFF(internal_mac_rx_errors)}, +}; + +static int send_crq_init_complete(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.generic.first = IBMVNIC_CRQ_INIT_CMD; + crq.generic.cmd = IBMVNIC_CRQ_INIT_COMPLETE; + + return ibmvnic_send_crq(adapter, &crq); +} + +static int send_version_xchg(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.version_exchange.first = IBMVNIC_CRQ_CMD; + crq.version_exchange.cmd = VERSION_EXCHANGE; + crq.version_exchange.version = cpu_to_be16(ibmvnic_version); + + return ibmvnic_send_crq(adapter, &crq); +} + +static long h_reg_sub_crq(unsigned long unit_address, unsigned long token, + unsigned long length, unsigned long *number, + unsigned long *irq) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_REG_SUB_CRQ, retbuf, unit_address, token, length); + *number = retbuf[0]; + *irq = retbuf[1]; + + return rc; +} + +/** + * ibmvnic_wait_for_completion - Check device state and wait for completion + * @adapter: private device data + * @comp_done: completion structure to wait for + * @timeout: time to wait in milliseconds + * + * Wait for a completion signal or until the timeout limit is reached + * while checking that the device is still active. + */ +static int ibmvnic_wait_for_completion(struct ibmvnic_adapter *adapter, + struct completion *comp_done, + unsigned long timeout) +{ + struct net_device *netdev; + unsigned long div_timeout; + u8 retry; + + netdev = adapter->netdev; + retry = 5; + div_timeout = msecs_to_jiffies(timeout / retry); + while (true) { + if (!adapter->crq.active) { + netdev_err(netdev, "Device down!\n"); + return -ENODEV; + } + if (!retry--) + break; + if (wait_for_completion_timeout(comp_done, div_timeout)) + return 0; + } + netdev_err(netdev, "Operation timed out.\n"); + return -ETIMEDOUT; +} + +/** + * reuse_ltb() - Check if a long term buffer can be reused + * @ltb: The long term buffer to be checked + * @size: The size of the long term buffer. + * + * An LTB can be reused unless its size has changed. + * + * Return: Return true if the LTB can be reused, false otherwise. + */ +static bool reuse_ltb(struct ibmvnic_long_term_buff *ltb, int size) +{ + return (ltb->buff && ltb->size == size); +} + +/** + * alloc_long_term_buff() - Allocate a long term buffer (LTB) + * + * @adapter: ibmvnic adapter associated to the LTB + * @ltb: container object for the LTB + * @size: size of the LTB + * + * Allocate an LTB of the specified size and notify VIOS. + * + * If the given @ltb already has the correct size, reuse it. Otherwise if + * its non-NULL, free it. Then allocate a new one of the correct size. + * Notify the VIOS either way since we may now be working with a new VIOS. + * + * Allocating larger chunks of memory during resets, specially LPM or under + * low memory situations can cause resets to fail/timeout and for LPAR to + * lose connectivity. So hold onto the LTB even if we fail to communicate + * with the VIOS and reuse it on next open. Free LTB when adapter is closed. + * + * Return: 0 if we were able to allocate the LTB and notify the VIOS and + * a negative value otherwise. + */ +static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, + struct ibmvnic_long_term_buff *ltb, int size) +{ + struct device *dev = &adapter->vdev->dev; + u64 prev = 0; + int rc; + + if (!reuse_ltb(ltb, size)) { + dev_dbg(dev, + "LTB size changed from 0x%llx to 0x%x, reallocating\n", + ltb->size, size); + prev = ltb->size; + free_long_term_buff(adapter, ltb); + } + + if (ltb->buff) { + dev_dbg(dev, "Reusing LTB [map %d, size 0x%llx]\n", + ltb->map_id, ltb->size); + } else { + ltb->buff = dma_alloc_coherent(dev, size, <b->addr, + GFP_KERNEL); + if (!ltb->buff) { + dev_err(dev, "Couldn't alloc long term buffer\n"); + return -ENOMEM; + } + ltb->size = size; + + ltb->map_id = find_first_zero_bit(adapter->map_ids, + MAX_MAP_ID); + bitmap_set(adapter->map_ids, ltb->map_id, 1); + + dev_dbg(dev, + "Allocated new LTB [map %d, size 0x%llx was 0x%llx]\n", + ltb->map_id, ltb->size, prev); + } + + /* Ensure ltb is zeroed - specially when reusing it. */ + memset(ltb->buff, 0, ltb->size); + + mutex_lock(&adapter->fw_lock); + adapter->fw_done_rc = 0; + reinit_completion(&adapter->fw_done); + + rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); + if (rc) { + dev_err(dev, "send_request_map failed, rc = %d\n", rc); + goto out; + } + + rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); + if (rc) { + dev_err(dev, "LTB map request aborted or timed out, rc = %d\n", + rc); + goto out; + } + + if (adapter->fw_done_rc) { + dev_err(dev, "Couldn't map LTB, rc = %d\n", + adapter->fw_done_rc); + rc = -EIO; + goto out; + } + rc = 0; +out: + /* don't free LTB on communication error - see function header */ + mutex_unlock(&adapter->fw_lock); + return rc; +} + +static void free_long_term_buff(struct ibmvnic_adapter *adapter, + struct ibmvnic_long_term_buff *ltb) +{ + struct device *dev = &adapter->vdev->dev; + + if (!ltb->buff) + return; + + /* VIOS automatically unmaps the long term buffer at remote + * end for the following resets: + * FAILOVER, MOBILITY, TIMEOUT. + */ + if (adapter->reset_reason != VNIC_RESET_FAILOVER && + adapter->reset_reason != VNIC_RESET_MOBILITY && + adapter->reset_reason != VNIC_RESET_TIMEOUT) + send_request_unmap(adapter, ltb->map_id); + + dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); + + ltb->buff = NULL; + /* mark this map_id free */ + bitmap_clear(adapter->map_ids, ltb->map_id, 1); + ltb->map_id = 0; +} + +/** + * free_ltb_set - free the given set of long term buffers (LTBS) + * @adapter: The ibmvnic adapter containing this ltb set + * @ltb_set: The ltb_set to be freed + * + * Free the set of LTBs in the given set. + */ + +static void free_ltb_set(struct ibmvnic_adapter *adapter, + struct ibmvnic_ltb_set *ltb_set) +{ + int i; + + for (i = 0; i < ltb_set->num_ltbs; i++) + free_long_term_buff(adapter, <b_set->ltbs[i]); + + kfree(ltb_set->ltbs); + ltb_set->ltbs = NULL; + ltb_set->num_ltbs = 0; +} + +/** + * alloc_ltb_set() - Allocate a set of long term buffers (LTBs) + * + * @adapter: ibmvnic adapter associated to the LTB + * @ltb_set: container object for the set of LTBs + * @num_buffs: Number of buffers in the LTB + * @buff_size: Size of each buffer in the LTB + * + * Allocate a set of LTBs to accommodate @num_buffs buffers of @buff_size + * each. We currently cap size each LTB to IBMVNIC_ONE_LTB_SIZE. If the + * new set of LTBs have fewer LTBs than the old set, free the excess LTBs. + * If new set needs more than in old set, allocate the remaining ones. + * Try and reuse as many LTBs as possible and avoid reallocation. + * + * Any changes to this allocation strategy must be reflected in + * map_rxpool_buff_to_ltb() and map_txpool_buff_to_ltb(). + */ +static int alloc_ltb_set(struct ibmvnic_adapter *adapter, + struct ibmvnic_ltb_set *ltb_set, int num_buffs, + int buff_size) +{ + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ltb_set old_set; + struct ibmvnic_ltb_set new_set; + int rem_size; + int tot_size; /* size of all ltbs */ + int ltb_size; /* size of one ltb */ + int nltbs; + int rc; + int n; + int i; + + dev_dbg(dev, "%s() num_buffs %d, buff_size %d\n", __func__, num_buffs, + buff_size); + + ltb_size = rounddown(IBMVNIC_ONE_LTB_SIZE, buff_size); + tot_size = num_buffs * buff_size; + + if (ltb_size > tot_size) + ltb_size = tot_size; + + nltbs = tot_size / ltb_size; + if (tot_size % ltb_size) + nltbs++; + + old_set = *ltb_set; + + if (old_set.num_ltbs == nltbs) { + new_set = old_set; + } else { + int tmp = nltbs * sizeof(struct ibmvnic_long_term_buff); + + new_set.ltbs = kzalloc(tmp, GFP_KERNEL); + if (!new_set.ltbs) + return -ENOMEM; + + new_set.num_ltbs = nltbs; + + /* Free any excess ltbs in old set */ + for (i = new_set.num_ltbs; i < old_set.num_ltbs; i++) + free_long_term_buff(adapter, &old_set.ltbs[i]); + + /* Copy remaining ltbs to new set. All LTBs except the + * last one are of the same size. alloc_long_term_buff() + * will realloc if the size changes. + */ + n = min(old_set.num_ltbs, new_set.num_ltbs); + for (i = 0; i < n; i++) + new_set.ltbs[i] = old_set.ltbs[i]; + + /* Any additional ltbs in new set will have NULL ltbs for + * now and will be allocated in alloc_long_term_buff(). + */ + + /* We no longer need the old_set so free it. Note that we + * may have reused some ltbs from old set and freed excess + * ltbs above. So we only need to free the container now + * not the LTBs themselves. (i.e. dont free_ltb_set()!) + */ + kfree(old_set.ltbs); + old_set.ltbs = NULL; + old_set.num_ltbs = 0; + + /* Install the new set. If allocations fail below, we will + * retry later and know what size LTBs we need. + */ + *ltb_set = new_set; + } + + i = 0; + rem_size = tot_size; + while (rem_size) { + if (ltb_size > rem_size) + ltb_size = rem_size; + + rem_size -= ltb_size; + + rc = alloc_long_term_buff(adapter, &new_set.ltbs[i], ltb_size); + if (rc) + goto out; + i++; + } + + WARN_ON(i != new_set.num_ltbs); + + return 0; +out: + /* We may have allocated one/more LTBs before failing and we + * want to try and reuse on next reset. So don't free ltb set. + */ + return rc; +} + +/** + * map_rxpool_buf_to_ltb - Map given rxpool buffer to offset in an LTB. + * @rxpool: The receive buffer pool containing buffer + * @bufidx: Index of buffer in rxpool + * @ltbp: (Output) pointer to the long term buffer containing the buffer + * @offset: (Output) offset of buffer in the LTB from @ltbp + * + * Map the given buffer identified by [rxpool, bufidx] to an LTB in the + * pool and its corresponding offset. Assume for now that each LTB is of + * different size but could possibly be optimized based on the allocation + * strategy in alloc_ltb_set(). + */ +static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool, + unsigned int bufidx, + struct ibmvnic_long_term_buff **ltbp, + unsigned int *offset) +{ + struct ibmvnic_long_term_buff *ltb; + int nbufs; /* # of buffers in one ltb */ + int i; + + WARN_ON(bufidx >= rxpool->size); + + for (i = 0; i < rxpool->ltb_set.num_ltbs; i++) { + ltb = &rxpool->ltb_set.ltbs[i]; + nbufs = ltb->size / rxpool->buff_size; + if (bufidx < nbufs) + break; + bufidx -= nbufs; + } + + *ltbp = ltb; + *offset = bufidx * rxpool->buff_size; +} + +/** + * map_txpool_buf_to_ltb - Map given txpool buffer to offset in an LTB. + * @txpool: The transmit buffer pool containing buffer + * @bufidx: Index of buffer in txpool + * @ltbp: (Output) pointer to the long term buffer (LTB) containing the buffer + * @offset: (Output) offset of buffer in the LTB from @ltbp + * + * Map the given buffer identified by [txpool, bufidx] to an LTB in the + * pool and its corresponding offset. + */ +static void map_txpool_buf_to_ltb(struct ibmvnic_tx_pool *txpool, + unsigned int bufidx, + struct ibmvnic_long_term_buff **ltbp, + unsigned int *offset) +{ + struct ibmvnic_long_term_buff *ltb; + int nbufs; /* # of buffers in one ltb */ + int i; + + WARN_ON_ONCE(bufidx >= txpool->num_buffers); + + for (i = 0; i < txpool->ltb_set.num_ltbs; i++) { + ltb = &txpool->ltb_set.ltbs[i]; + nbufs = ltb->size / txpool->buf_size; + if (bufidx < nbufs) + break; + bufidx -= nbufs; + } + + *ltbp = ltb; + *offset = bufidx * txpool->buf_size; +} + +static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_active_rx_pools; i++) + adapter->rx_pool[i].active = 0; +} + +static void replenish_rx_pool(struct ibmvnic_adapter *adapter, + struct ibmvnic_rx_pool *pool) +{ + int count = pool->size - atomic_read(&pool->available); + u64 handle = adapter->rx_scrq[pool->index]->handle; + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_sub_crq_queue *rx_scrq; + struct ibmvnic_long_term_buff *ltb; + union sub_crq *sub_crq; + int buffers_added = 0; + unsigned long lpar_rc; + struct sk_buff *skb; + unsigned int offset; + dma_addr_t dma_addr; + unsigned char *dst; + int shift = 0; + int bufidx; + int i; + + if (!pool->active) + return; + + rx_scrq = adapter->rx_scrq[pool->index]; + ind_bufp = &rx_scrq->ind_buf; + + /* netdev_skb_alloc() could have failed after we saved a few skbs + * in the indir_buf and we would not have sent them to VIOS yet. + * To account for them, start the loop at ind_bufp->index rather + * than 0. If we pushed all the skbs to VIOS, ind_bufp->index will + * be 0. + */ + for (i = ind_bufp->index; i < count; ++i) { + bufidx = pool->free_map[pool->next_free]; + + /* We maybe reusing the skb from earlier resets. Allocate + * only if necessary. But since the LTB may have changed + * during reset (see init_rx_pools()), update LTB below + * even if reusing skb. + */ + skb = pool->rx_buff[bufidx].skb; + if (!skb) { + skb = netdev_alloc_skb(adapter->netdev, + pool->buff_size); + if (!skb) { + dev_err(dev, "Couldn't replenish rx buff\n"); + adapter->replenish_no_mem++; + break; + } + } + + pool->free_map[pool->next_free] = IBMVNIC_INVALID_MAP; + pool->next_free = (pool->next_free + 1) % pool->size; + + /* Copy the skb to the long term mapped DMA buffer */ + map_rxpool_buf_to_ltb(pool, bufidx, <b, &offset); + dst = ltb->buff + offset; + memset(dst, 0, pool->buff_size); + dma_addr = ltb->addr + offset; + + /* add the skb to an rx_buff in the pool */ + pool->rx_buff[bufidx].data = dst; + pool->rx_buff[bufidx].dma = dma_addr; + pool->rx_buff[bufidx].skb = skb; + pool->rx_buff[bufidx].pool_index = pool->index; + pool->rx_buff[bufidx].size = pool->buff_size; + + /* queue the rx_buff for the next send_subcrq_indirect */ + sub_crq = &ind_bufp->indir_arr[ind_bufp->index++]; + memset(sub_crq, 0, sizeof(*sub_crq)); + sub_crq->rx_add.first = IBMVNIC_CRQ_CMD; + sub_crq->rx_add.correlator = + cpu_to_be64((u64)&pool->rx_buff[bufidx]); + sub_crq->rx_add.ioba = cpu_to_be32(dma_addr); + sub_crq->rx_add.map_id = ltb->map_id; + + /* The length field of the sCRQ is defined to be 24 bits so the + * buffer size needs to be left shifted by a byte before it is + * converted to big endian to prevent the last byte from being + * truncated. + */ +#ifdef __LITTLE_ENDIAN__ + shift = 8; +#endif + sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); + + /* if send_subcrq_indirect queue is full, flush to VIOS */ + if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS || + i == count - 1) { + lpar_rc = + send_subcrq_indirect(adapter, handle, + (u64)ind_bufp->indir_dma, + (u64)ind_bufp->index); + if (lpar_rc != H_SUCCESS) + goto failure; + buffers_added += ind_bufp->index; + adapter->replenish_add_buff_success += ind_bufp->index; + ind_bufp->index = 0; + } + } + atomic_add(buffers_added, &pool->available); + return; + +failure: + if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) + dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); + for (i = ind_bufp->index - 1; i >= 0; --i) { + struct ibmvnic_rx_buff *rx_buff; + + pool->next_free = pool->next_free == 0 ? + pool->size - 1 : pool->next_free - 1; + sub_crq = &ind_bufp->indir_arr[i]; + rx_buff = (struct ibmvnic_rx_buff *) + be64_to_cpu(sub_crq->rx_add.correlator); + bufidx = (int)(rx_buff - pool->rx_buff); + pool->free_map[pool->next_free] = bufidx; + dev_kfree_skb_any(pool->rx_buff[bufidx].skb); + pool->rx_buff[bufidx].skb = NULL; + } + adapter->replenish_add_buff_failure += ind_bufp->index; + atomic_add(buffers_added, &pool->available); + ind_bufp->index = 0; + if (lpar_rc == H_CLOSED || adapter->failover_pending) { + /* Disable buffer pool replenishment and report carrier off if + * queue is closed or pending failover. + * Firmware guarantees that a signal will be sent to the + * driver, triggering a reset. + */ + deactivate_rx_pools(adapter); + netif_carrier_off(adapter->netdev); + } +} + +static void replenish_pools(struct ibmvnic_adapter *adapter) +{ + int i; + + adapter->replenish_task_cycles++; + for (i = 0; i < adapter->num_active_rx_pools; i++) { + if (adapter->rx_pool[i].active) + replenish_rx_pool(adapter, &adapter->rx_pool[i]); + } + + netdev_dbg(adapter->netdev, "Replenished %d pools\n", i); +} + +static void release_stats_buffers(struct ibmvnic_adapter *adapter) +{ + kfree(adapter->tx_stats_buffers); + kfree(adapter->rx_stats_buffers); + adapter->tx_stats_buffers = NULL; + adapter->rx_stats_buffers = NULL; +} + +static int init_stats_buffers(struct ibmvnic_adapter *adapter) +{ + adapter->tx_stats_buffers = + kcalloc(IBMVNIC_MAX_QUEUES, + sizeof(struct ibmvnic_tx_queue_stats), + GFP_KERNEL); + if (!adapter->tx_stats_buffers) + return -ENOMEM; + + adapter->rx_stats_buffers = + kcalloc(IBMVNIC_MAX_QUEUES, + sizeof(struct ibmvnic_rx_queue_stats), + GFP_KERNEL); + if (!adapter->rx_stats_buffers) + return -ENOMEM; + + return 0; +} + +static void release_stats_token(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + + if (!adapter->stats_token) + return; + + dma_unmap_single(dev, adapter->stats_token, + sizeof(struct ibmvnic_statistics), + DMA_FROM_DEVICE); + adapter->stats_token = 0; +} + +static int init_stats_token(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + dma_addr_t stok; + int rc; + + stok = dma_map_single(dev, &adapter->stats, + sizeof(struct ibmvnic_statistics), + DMA_FROM_DEVICE); + rc = dma_mapping_error(dev, stok); + if (rc) { + dev_err(dev, "Couldn't map stats buffer, rc = %d\n", rc); + return rc; + } + + adapter->stats_token = stok; + netdev_dbg(adapter->netdev, "Stats token initialized (%llx)\n", stok); + return 0; +} + +/** + * release_rx_pools() - Release any rx pools attached to @adapter. + * @adapter: ibmvnic adapter + * + * Safe to call this multiple times - even if no pools are attached. + */ +static void release_rx_pools(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_rx_pool *rx_pool; + int i, j; + + if (!adapter->rx_pool) + return; + + for (i = 0; i < adapter->num_active_rx_pools; i++) { + rx_pool = &adapter->rx_pool[i]; + + netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i); + + kfree(rx_pool->free_map); + + free_ltb_set(adapter, &rx_pool->ltb_set); + + if (!rx_pool->rx_buff) + continue; + + for (j = 0; j < rx_pool->size; j++) { + if (rx_pool->rx_buff[j].skb) { + dev_kfree_skb_any(rx_pool->rx_buff[j].skb); + rx_pool->rx_buff[j].skb = NULL; + } + } + + kfree(rx_pool->rx_buff); + } + + kfree(adapter->rx_pool); + adapter->rx_pool = NULL; + adapter->num_active_rx_pools = 0; + adapter->prev_rx_pool_size = 0; +} + +/** + * reuse_rx_pools() - Check if the existing rx pools can be reused. + * @adapter: ibmvnic adapter + * + * Check if the existing rx pools in the adapter can be reused. The + * pools can be reused if the pool parameters (number of pools, + * number of buffers in the pool and size of each buffer) have not + * changed. + * + * NOTE: This assumes that all pools have the same number of buffers + * which is the case currently. If that changes, we must fix this. + * + * Return: true if the rx pools can be reused, false otherwise. + */ +static bool reuse_rx_pools(struct ibmvnic_adapter *adapter) +{ + u64 old_num_pools, new_num_pools; + u64 old_pool_size, new_pool_size; + u64 old_buff_size, new_buff_size; + + if (!adapter->rx_pool) + return false; + + old_num_pools = adapter->num_active_rx_pools; + new_num_pools = adapter->req_rx_queues; + + old_pool_size = adapter->prev_rx_pool_size; + new_pool_size = adapter->req_rx_add_entries_per_subcrq; + + old_buff_size = adapter->prev_rx_buf_sz; + new_buff_size = adapter->cur_rx_buf_sz; + + if (old_buff_size != new_buff_size || + old_num_pools != new_num_pools || + old_pool_size != new_pool_size) + return false; + + return true; +} + +/** + * init_rx_pools(): Initialize the set of receiver pools in the adapter. + * @netdev: net device associated with the vnic interface + * + * Initialize the set of receiver pools in the ibmvnic adapter associated + * with the net_device @netdev. If possible, reuse the existing rx pools. + * Otherwise free any existing pools and allocate a new set of pools + * before initializing them. + * + * Return: 0 on success and negative value on error. + */ +static int init_rx_pools(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_rx_pool *rx_pool; + u64 num_pools; + u64 pool_size; /* # of buffers in one pool */ + u64 buff_size; + int i, j, rc; + + pool_size = adapter->req_rx_add_entries_per_subcrq; + num_pools = adapter->req_rx_queues; + buff_size = adapter->cur_rx_buf_sz; + + if (reuse_rx_pools(adapter)) { + dev_dbg(dev, "Reusing rx pools\n"); + goto update_ltb; + } + + /* Allocate/populate the pools. */ + release_rx_pools(adapter); + + adapter->rx_pool = kcalloc(num_pools, + sizeof(struct ibmvnic_rx_pool), + GFP_KERNEL); + if (!adapter->rx_pool) { + dev_err(dev, "Failed to allocate rx pools\n"); + return -ENOMEM; + } + + /* Set num_active_rx_pools early. If we fail below after partial + * allocation, release_rx_pools() will know how many to look for. + */ + adapter->num_active_rx_pools = num_pools; + + for (i = 0; i < num_pools; i++) { + rx_pool = &adapter->rx_pool[i]; + + netdev_dbg(adapter->netdev, + "Initializing rx_pool[%d], %lld buffs, %lld bytes each\n", + i, pool_size, buff_size); + + rx_pool->size = pool_size; + rx_pool->index = i; + rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES); + + rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int), + GFP_KERNEL); + if (!rx_pool->free_map) { + dev_err(dev, "Couldn't alloc free_map %d\n", i); + rc = -ENOMEM; + goto out_release; + } + + rx_pool->rx_buff = kcalloc(rx_pool->size, + sizeof(struct ibmvnic_rx_buff), + GFP_KERNEL); + if (!rx_pool->rx_buff) { + dev_err(dev, "Couldn't alloc rx buffers\n"); + rc = -ENOMEM; + goto out_release; + } + } + + adapter->prev_rx_pool_size = pool_size; + adapter->prev_rx_buf_sz = adapter->cur_rx_buf_sz; + +update_ltb: + for (i = 0; i < num_pools; i++) { + rx_pool = &adapter->rx_pool[i]; + dev_dbg(dev, "Updating LTB for rx pool %d [%d, %d]\n", + i, rx_pool->size, rx_pool->buff_size); + + rc = alloc_ltb_set(adapter, &rx_pool->ltb_set, + rx_pool->size, rx_pool->buff_size); + if (rc) + goto out; + + for (j = 0; j < rx_pool->size; ++j) { + struct ibmvnic_rx_buff *rx_buff; + + rx_pool->free_map[j] = j; + + /* NOTE: Don't clear rx_buff->skb here - will leak + * memory! replenish_rx_pool() will reuse skbs or + * allocate as necessary. + */ + rx_buff = &rx_pool->rx_buff[j]; + rx_buff->dma = 0; + rx_buff->data = 0; + rx_buff->size = 0; + rx_buff->pool_index = 0; + } + + /* Mark pool "empty" so replenish_rx_pools() will + * update the LTB info for each buffer + */ + atomic_set(&rx_pool->available, 0); + rx_pool->next_alloc = 0; + rx_pool->next_free = 0; + /* replenish_rx_pool() may have called deactivate_rx_pools() + * on failover. Ensure pool is active now. + */ + rx_pool->active = 1; + } + return 0; +out_release: + release_rx_pools(adapter); +out: + /* We failed to allocate one or more LTBs or map them on the VIOS. + * Hold onto the pools and any LTBs that we did allocate/map. + */ + return rc; +} + +static void release_vpd_data(struct ibmvnic_adapter *adapter) +{ + if (!adapter->vpd) + return; + + kfree(adapter->vpd->buff); + kfree(adapter->vpd); + + adapter->vpd = NULL; +} + +static void release_one_tx_pool(struct ibmvnic_adapter *adapter, + struct ibmvnic_tx_pool *tx_pool) +{ + kfree(tx_pool->tx_buff); + kfree(tx_pool->free_map); + free_ltb_set(adapter, &tx_pool->ltb_set); +} + +/** + * release_tx_pools() - Release any tx pools attached to @adapter. + * @adapter: ibmvnic adapter + * + * Safe to call this multiple times - even if no pools are attached. + */ +static void release_tx_pools(struct ibmvnic_adapter *adapter) +{ + int i; + + /* init_tx_pools() ensures that ->tx_pool and ->tso_pool are + * both NULL or both non-NULL. So we only need to check one. + */ + if (!adapter->tx_pool) + return; + + for (i = 0; i < adapter->num_active_tx_pools; i++) { + release_one_tx_pool(adapter, &adapter->tx_pool[i]); + release_one_tx_pool(adapter, &adapter->tso_pool[i]); + } + + kfree(adapter->tx_pool); + adapter->tx_pool = NULL; + kfree(adapter->tso_pool); + adapter->tso_pool = NULL; + adapter->num_active_tx_pools = 0; + adapter->prev_tx_pool_size = 0; +} + +static int init_one_tx_pool(struct net_device *netdev, + struct ibmvnic_tx_pool *tx_pool, + int pool_size, int buf_size) +{ + int i; + + tx_pool->tx_buff = kcalloc(pool_size, + sizeof(struct ibmvnic_tx_buff), + GFP_KERNEL); + if (!tx_pool->tx_buff) + return -ENOMEM; + + tx_pool->free_map = kcalloc(pool_size, sizeof(int), GFP_KERNEL); + if (!tx_pool->free_map) { + kfree(tx_pool->tx_buff); + tx_pool->tx_buff = NULL; + return -ENOMEM; + } + + for (i = 0; i < pool_size; i++) + tx_pool->free_map[i] = i; + + tx_pool->consumer_index = 0; + tx_pool->producer_index = 0; + tx_pool->num_buffers = pool_size; + tx_pool->buf_size = buf_size; + + return 0; +} + +/** + * reuse_tx_pools() - Check if the existing tx pools can be reused. + * @adapter: ibmvnic adapter + * + * Check if the existing tx pools in the adapter can be reused. The + * pools can be reused if the pool parameters (number of pools, + * number of buffers in the pool and mtu) have not changed. + * + * NOTE: This assumes that all pools have the same number of buffers + * which is the case currently. If that changes, we must fix this. + * + * Return: true if the tx pools can be reused, false otherwise. + */ +static bool reuse_tx_pools(struct ibmvnic_adapter *adapter) +{ + u64 old_num_pools, new_num_pools; + u64 old_pool_size, new_pool_size; + u64 old_mtu, new_mtu; + + if (!adapter->tx_pool) + return false; + + old_num_pools = adapter->num_active_tx_pools; + new_num_pools = adapter->num_active_tx_scrqs; + old_pool_size = adapter->prev_tx_pool_size; + new_pool_size = adapter->req_tx_entries_per_subcrq; + old_mtu = adapter->prev_mtu; + new_mtu = adapter->req_mtu; + + if (old_mtu != new_mtu || + old_num_pools != new_num_pools || + old_pool_size != new_pool_size) + return false; + + return true; +} + +/** + * init_tx_pools(): Initialize the set of transmit pools in the adapter. + * @netdev: net device associated with the vnic interface + * + * Initialize the set of transmit pools in the ibmvnic adapter associated + * with the net_device @netdev. If possible, reuse the existing tx pools. + * Otherwise free any existing pools and allocate a new set of pools + * before initializing them. + * + * Return: 0 on success and negative value on error. + */ +static int init_tx_pools(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + struct device *dev = &adapter->vdev->dev; + int num_pools; + u64 pool_size; /* # of buffers in pool */ + u64 buff_size; + int i, j, rc; + + num_pools = adapter->req_tx_queues; + + /* We must notify the VIOS about the LTB on all resets - but we only + * need to alloc/populate pools if either the number of buffers or + * size of each buffer in the pool has changed. + */ + if (reuse_tx_pools(adapter)) { + netdev_dbg(netdev, "Reusing tx pools\n"); + goto update_ltb; + } + + /* Allocate/populate the pools. */ + release_tx_pools(adapter); + + pool_size = adapter->req_tx_entries_per_subcrq; + num_pools = adapter->num_active_tx_scrqs; + + adapter->tx_pool = kcalloc(num_pools, + sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); + if (!adapter->tx_pool) + return -ENOMEM; + + adapter->tso_pool = kcalloc(num_pools, + sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); + /* To simplify release_tx_pools() ensure that ->tx_pool and + * ->tso_pool are either both NULL or both non-NULL. + */ + if (!adapter->tso_pool) { + kfree(adapter->tx_pool); + adapter->tx_pool = NULL; + return -ENOMEM; + } + + /* Set num_active_tx_pools early. If we fail below after partial + * allocation, release_tx_pools() will know how many to look for. + */ + adapter->num_active_tx_pools = num_pools; + + buff_size = adapter->req_mtu + VLAN_HLEN; + buff_size = ALIGN(buff_size, L1_CACHE_BYTES); + + for (i = 0; i < num_pools; i++) { + dev_dbg(dev, "Init tx pool %d [%llu, %llu]\n", + i, adapter->req_tx_entries_per_subcrq, buff_size); + + rc = init_one_tx_pool(netdev, &adapter->tx_pool[i], + pool_size, buff_size); + if (rc) + goto out_release; + + rc = init_one_tx_pool(netdev, &adapter->tso_pool[i], + IBMVNIC_TSO_BUFS, + IBMVNIC_TSO_BUF_SZ); + if (rc) + goto out_release; + } + + adapter->prev_tx_pool_size = pool_size; + adapter->prev_mtu = adapter->req_mtu; + +update_ltb: + /* NOTE: All tx_pools have the same number of buffers (which is + * same as pool_size). All tso_pools have IBMVNIC_TSO_BUFS + * buffers (see calls init_one_tx_pool() for these). + * For consistency, we use tx_pool->num_buffers and + * tso_pool->num_buffers below. + */ + rc = -1; + for (i = 0; i < num_pools; i++) { + struct ibmvnic_tx_pool *tso_pool; + struct ibmvnic_tx_pool *tx_pool; + + tx_pool = &adapter->tx_pool[i]; + + dev_dbg(dev, "Updating LTB for tx pool %d [%d, %d]\n", + i, tx_pool->num_buffers, tx_pool->buf_size); + + rc = alloc_ltb_set(adapter, &tx_pool->ltb_set, + tx_pool->num_buffers, tx_pool->buf_size); + if (rc) + goto out; + + tx_pool->consumer_index = 0; + tx_pool->producer_index = 0; + + for (j = 0; j < tx_pool->num_buffers; j++) + tx_pool->free_map[j] = j; + + tso_pool = &adapter->tso_pool[i]; + + dev_dbg(dev, "Updating LTB for tso pool %d [%d, %d]\n", + i, tso_pool->num_buffers, tso_pool->buf_size); + + rc = alloc_ltb_set(adapter, &tso_pool->ltb_set, + tso_pool->num_buffers, tso_pool->buf_size); + if (rc) + goto out; + + tso_pool->consumer_index = 0; + tso_pool->producer_index = 0; + + for (j = 0; j < tso_pool->num_buffers; j++) + tso_pool->free_map[j] = j; + } + + return 0; +out_release: + release_tx_pools(adapter); +out: + /* We failed to allocate one or more LTBs or map them on the VIOS. + * Hold onto the pools and any LTBs that we did allocate/map. + */ + return rc; +} + +static void ibmvnic_napi_enable(struct ibmvnic_adapter *adapter) +{ + int i; + + if (adapter->napi_enabled) + return; + + for (i = 0; i < adapter->req_rx_queues; i++) + napi_enable(&adapter->napi[i]); + + adapter->napi_enabled = true; +} + +static void ibmvnic_napi_disable(struct ibmvnic_adapter *adapter) +{ + int i; + + if (!adapter->napi_enabled) + return; + + for (i = 0; i < adapter->req_rx_queues; i++) { + netdev_dbg(adapter->netdev, "Disabling napi[%d]\n", i); + napi_disable(&adapter->napi[i]); + } + + adapter->napi_enabled = false; +} + +static int init_napi(struct ibmvnic_adapter *adapter) +{ + int i; + + adapter->napi = kcalloc(adapter->req_rx_queues, + sizeof(struct napi_struct), GFP_KERNEL); + if (!adapter->napi) + return -ENOMEM; + + for (i = 0; i < adapter->req_rx_queues; i++) { + netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i); + netif_napi_add(adapter->netdev, &adapter->napi[i], + ibmvnic_poll); + } + + adapter->num_active_rx_napi = adapter->req_rx_queues; + return 0; +} + +static void release_napi(struct ibmvnic_adapter *adapter) +{ + int i; + + if (!adapter->napi) + return; + + for (i = 0; i < adapter->num_active_rx_napi; i++) { + netdev_dbg(adapter->netdev, "Releasing napi[%d]\n", i); + netif_napi_del(&adapter->napi[i]); + } + + kfree(adapter->napi); + adapter->napi = NULL; + adapter->num_active_rx_napi = 0; + adapter->napi_enabled = false; +} + +static const char *adapter_state_to_string(enum vnic_state state) +{ + switch (state) { + case VNIC_PROBING: + return "PROBING"; + case VNIC_PROBED: + return "PROBED"; + case VNIC_OPENING: + return "OPENING"; + case VNIC_OPEN: + return "OPEN"; + case VNIC_CLOSING: + return "CLOSING"; + case VNIC_CLOSED: + return "CLOSED"; + case VNIC_REMOVING: + return "REMOVING"; + case VNIC_REMOVED: + return "REMOVED"; + case VNIC_DOWN: + return "DOWN"; + } + return "UNKNOWN"; +} + +static int ibmvnic_login(struct net_device *netdev) +{ + unsigned long flags, timeout = msecs_to_jiffies(20000); + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int retry_count = 0; + int retries = 10; + bool retry; + int rc; + + do { + retry = false; + if (retry_count > retries) { + netdev_warn(netdev, "Login attempts exceeded\n"); + return -EACCES; + } + + adapter->init_done_rc = 0; + reinit_completion(&adapter->init_done); + rc = send_login(adapter); + if (rc) + return rc; + + if (!wait_for_completion_timeout(&adapter->init_done, + timeout)) { + netdev_warn(netdev, "Login timed out\n"); + adapter->login_pending = false; + goto partial_reset; + } + + if (adapter->init_done_rc == ABORTED) { + netdev_warn(netdev, "Login aborted, retrying...\n"); + retry = true; + adapter->init_done_rc = 0; + retry_count++; + /* FW or device may be busy, so + * wait a bit before retrying login + */ + msleep(500); + } else if (adapter->init_done_rc == PARTIALSUCCESS) { + retry_count++; + release_sub_crqs(adapter, 1); + + retry = true; + netdev_dbg(netdev, + "Received partial success, retrying...\n"); + adapter->init_done_rc = 0; + reinit_completion(&adapter->init_done); + send_query_cap(adapter); + if (!wait_for_completion_timeout(&adapter->init_done, + timeout)) { + netdev_warn(netdev, + "Capabilities query timed out\n"); + return -ETIMEDOUT; + } + + rc = init_sub_crqs(adapter); + if (rc) { + netdev_warn(netdev, + "SCRQ initialization failed\n"); + return rc; + } + + rc = init_sub_crq_irqs(adapter); + if (rc) { + netdev_warn(netdev, + "SCRQ irq initialization failed\n"); + return rc; + } + /* Default/timeout error handling, reset and start fresh */ + } else if (adapter->init_done_rc) { + netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n", + adapter->init_done_rc); + +partial_reset: + /* adapter login failed, so free any CRQs or sub-CRQs + * and register again before attempting to login again. + * If we don't do this then the VIOS may think that + * we are already logged in and reject any subsequent + * attempts + */ + netdev_warn(netdev, + "Freeing and re-registering CRQs before attempting to login again\n"); + retry = true; + adapter->init_done_rc = 0; + release_sub_crqs(adapter, true); + /* Much of this is similar logic as ibmvnic_probe(), + * we are essentially re-initializing communication + * with the server. We really should not run any + * resets/failovers here because this is already a form + * of reset and we do not want parallel resets occurring + */ + do { + reinit_init_done(adapter); + /* Clear any failovers we got in the previous + * pass since we are re-initializing the CRQ + */ + adapter->failover_pending = false; + release_crq_queue(adapter); + /* If we don't sleep here then we risk an + * unnecessary failover event from the VIOS. + * This is a known VIOS issue caused by a vnic + * device freeing and registering a CRQ too + * quickly. + */ + msleep(1500); + /* Avoid any resets, since we are currently + * resetting. + */ + spin_lock_irqsave(&adapter->rwi_lock, flags); + flush_reset_queue(adapter); + spin_unlock_irqrestore(&adapter->rwi_lock, + flags); + + rc = init_crq_queue(adapter); + if (rc) { + netdev_err(netdev, "login recovery: init CRQ failed %d\n", + rc); + return -EIO; + } + + rc = ibmvnic_reset_init(adapter, false); + if (rc) + netdev_err(netdev, "login recovery: Reset init failed %d\n", + rc); + /* IBMVNIC_CRQ_INIT will return EAGAIN if it + * fails, since ibmvnic_reset_init will free + * irq's in failure, we won't be able to receive + * new CRQs so we need to keep trying. probe() + * handles this similarly. + */ + } while (rc == -EAGAIN && retry_count++ < retries); + } + } while (retry); + + __ibmvnic_set_mac(netdev, adapter->mac_addr); + + netdev_dbg(netdev, "[S:%s] Login succeeded\n", adapter_state_to_string(adapter->state)); + return 0; +} + +static void release_login_buffer(struct ibmvnic_adapter *adapter) +{ + if (!adapter->login_buf) + return; + + dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token, + adapter->login_buf_sz, DMA_TO_DEVICE); + kfree(adapter->login_buf); + adapter->login_buf = NULL; +} + +static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter) +{ + if (!adapter->login_rsp_buf) + return; + + dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token, + adapter->login_rsp_buf_sz, DMA_FROM_DEVICE); + kfree(adapter->login_rsp_buf); + adapter->login_rsp_buf = NULL; +} + +static void release_resources(struct ibmvnic_adapter *adapter) +{ + release_vpd_data(adapter); + + release_napi(adapter); + release_login_buffer(adapter); + release_login_rsp_buffer(adapter); +} + +static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) +{ + struct net_device *netdev = adapter->netdev; + unsigned long timeout = msecs_to_jiffies(20000); + union ibmvnic_crq crq; + bool resend; + int rc; + + netdev_dbg(netdev, "setting link state %d\n", link_state); + + memset(&crq, 0, sizeof(crq)); + crq.logical_link_state.first = IBMVNIC_CRQ_CMD; + crq.logical_link_state.cmd = LOGICAL_LINK_STATE; + crq.logical_link_state.link_state = link_state; + + do { + resend = false; + + reinit_completion(&adapter->init_done); + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + netdev_err(netdev, "Failed to set link state\n"); + return rc; + } + + if (!wait_for_completion_timeout(&adapter->init_done, + timeout)) { + netdev_err(netdev, "timeout setting link state\n"); + return -ETIMEDOUT; + } + + if (adapter->init_done_rc == PARTIALSUCCESS) { + /* Partuial success, delay and re-send */ + mdelay(1000); + resend = true; + } else if (adapter->init_done_rc) { + netdev_warn(netdev, "Unable to set link state, rc=%d\n", + adapter->init_done_rc); + return adapter->init_done_rc; + } + } while (resend); + + return 0; +} + +static int set_real_num_queues(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int rc; + + netdev_dbg(netdev, "Setting real tx/rx queues (%llx/%llx)\n", + adapter->req_tx_queues, adapter->req_rx_queues); + + rc = netif_set_real_num_tx_queues(netdev, adapter->req_tx_queues); + if (rc) { + netdev_err(netdev, "failed to set the number of tx queues\n"); + return rc; + } + + rc = netif_set_real_num_rx_queues(netdev, adapter->req_rx_queues); + if (rc) + netdev_err(netdev, "failed to set the number of rx queues\n"); + + return rc; +} + +static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + union ibmvnic_crq crq; + int len = 0; + int rc; + + if (adapter->vpd->buff) + len = adapter->vpd->len; + + mutex_lock(&adapter->fw_lock); + adapter->fw_done_rc = 0; + reinit_completion(&adapter->fw_done); + + crq.get_vpd_size.first = IBMVNIC_CRQ_CMD; + crq.get_vpd_size.cmd = GET_VPD_SIZE; + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + mutex_unlock(&adapter->fw_lock); + return rc; + } + + rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); + if (rc) { + dev_err(dev, "Could not retrieve VPD size, rc = %d\n", rc); + mutex_unlock(&adapter->fw_lock); + return rc; + } + mutex_unlock(&adapter->fw_lock); + + if (!adapter->vpd->len) + return -ENODATA; + + if (!adapter->vpd->buff) + adapter->vpd->buff = kzalloc(adapter->vpd->len, GFP_KERNEL); + else if (adapter->vpd->len != len) + adapter->vpd->buff = + krealloc(adapter->vpd->buff, + adapter->vpd->len, GFP_KERNEL); + + if (!adapter->vpd->buff) { + dev_err(dev, "Could allocate VPD buffer\n"); + return -ENOMEM; + } + + adapter->vpd->dma_addr = + dma_map_single(dev, adapter->vpd->buff, adapter->vpd->len, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, adapter->vpd->dma_addr)) { + dev_err(dev, "Could not map VPD buffer\n"); + kfree(adapter->vpd->buff); + adapter->vpd->buff = NULL; + return -ENOMEM; + } + + mutex_lock(&adapter->fw_lock); + adapter->fw_done_rc = 0; + reinit_completion(&adapter->fw_done); + + crq.get_vpd.first = IBMVNIC_CRQ_CMD; + crq.get_vpd.cmd = GET_VPD; + crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr); + crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len); + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + kfree(adapter->vpd->buff); + adapter->vpd->buff = NULL; + mutex_unlock(&adapter->fw_lock); + return rc; + } + + rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); + if (rc) { + dev_err(dev, "Unable to retrieve VPD, rc = %d\n", rc); + kfree(adapter->vpd->buff); + adapter->vpd->buff = NULL; + mutex_unlock(&adapter->fw_lock); + return rc; + } + + mutex_unlock(&adapter->fw_lock); + return 0; +} + +static int init_resources(struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int rc; + + rc = set_real_num_queues(netdev); + if (rc) + return rc; + + adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL); + if (!adapter->vpd) + return -ENOMEM; + + /* Vital Product Data (VPD) */ + rc = ibmvnic_get_vpd(adapter); + if (rc) { + netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n"); + return rc; + } + + rc = init_napi(adapter); + if (rc) + return rc; + + send_query_map(adapter); + + rc = init_rx_pools(netdev); + if (rc) + return rc; + + rc = init_tx_pools(netdev); + return rc; +} + +static int __ibmvnic_open(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + enum vnic_state prev_state = adapter->state; + int i, rc; + + adapter->state = VNIC_OPENING; + replenish_pools(adapter); + ibmvnic_napi_enable(adapter); + + /* We're ready to receive frames, enable the sub-crq interrupts and + * set the logical link state to up + */ + for (i = 0; i < adapter->req_rx_queues; i++) { + netdev_dbg(netdev, "Enabling rx_scrq[%d] irq\n", i); + if (prev_state == VNIC_CLOSED) + enable_irq(adapter->rx_scrq[i]->irq); + enable_scrq_irq(adapter, adapter->rx_scrq[i]); + } + + for (i = 0; i < adapter->req_tx_queues; i++) { + netdev_dbg(netdev, "Enabling tx_scrq[%d] irq\n", i); + if (prev_state == VNIC_CLOSED) + enable_irq(adapter->tx_scrq[i]->irq); + enable_scrq_irq(adapter, adapter->tx_scrq[i]); + /* netdev_tx_reset_queue will reset dql stats. During NON_FATAL + * resets, don't reset the stats because there could be batched + * skb's waiting to be sent. If we reset dql stats, we risk + * num_completed being greater than num_queued. This will cause + * a BUG_ON in dql_completed(). + */ + if (adapter->reset_reason != VNIC_RESET_NON_FATAL) + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); + } + + rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); + if (rc) { + ibmvnic_napi_disable(adapter); + ibmvnic_disable_irqs(adapter); + return rc; + } + + adapter->tx_queues_active = true; + + /* Since queues were stopped until now, there shouldn't be any + * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we + * don't need the synchronize_rcu()? Leaving it for consistency + * with setting ->tx_queues_active = false. + */ + synchronize_rcu(); + + netif_tx_start_all_queues(netdev); + + if (prev_state == VNIC_CLOSED) { + for (i = 0; i < adapter->req_rx_queues; i++) + napi_schedule(&adapter->napi[i]); + } + + adapter->state = VNIC_OPEN; + return rc; +} + +static int ibmvnic_open(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int rc; + + ASSERT_RTNL(); + + /* If device failover is pending or we are about to reset, just set + * device state and return. Device operation will be handled by reset + * routine. + * + * It should be safe to overwrite the adapter->state here. Since + * we hold the rtnl, either the reset has not actually started or + * the rtnl got dropped during the set_link_state() in do_reset(). + * In the former case, no one else is changing the state (again we + * have the rtnl) and in the latter case, do_reset() will detect and + * honor our setting below. + */ + if (adapter->failover_pending || (test_bit(0, &adapter->resetting))) { + netdev_dbg(netdev, "[S:%s FOP:%d] Resetting, deferring open\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending); + adapter->state = VNIC_OPEN; + rc = 0; + goto out; + } + + if (adapter->state != VNIC_CLOSED) { + rc = ibmvnic_login(netdev); + if (rc) + goto out; + + rc = init_resources(adapter); + if (rc) { + netdev_err(netdev, "failed to initialize resources\n"); + goto out; + } + } + + rc = __ibmvnic_open(netdev); + +out: + /* If open failed and there is a pending failover or in-progress reset, + * set device state and return. Device operation will be handled by + * reset routine. See also comments above regarding rtnl. + */ + if (rc && + (adapter->failover_pending || (test_bit(0, &adapter->resetting)))) { + adapter->state = VNIC_OPEN; + rc = 0; + } + + if (rc) { + release_resources(adapter); + release_rx_pools(adapter); + release_tx_pools(adapter); + } + + return rc; +} + +static void clean_rx_pools(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_rx_pool *rx_pool; + struct ibmvnic_rx_buff *rx_buff; + u64 rx_entries; + int rx_scrqs; + int i, j; + + if (!adapter->rx_pool) + return; + + rx_scrqs = adapter->num_active_rx_pools; + rx_entries = adapter->req_rx_add_entries_per_subcrq; + + /* Free any remaining skbs in the rx buffer pools */ + for (i = 0; i < rx_scrqs; i++) { + rx_pool = &adapter->rx_pool[i]; + if (!rx_pool || !rx_pool->rx_buff) + continue; + + netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i); + for (j = 0; j < rx_entries; j++) { + rx_buff = &rx_pool->rx_buff[j]; + if (rx_buff && rx_buff->skb) { + dev_kfree_skb_any(rx_buff->skb); + rx_buff->skb = NULL; + } + } + } +} + +static void clean_one_tx_pool(struct ibmvnic_adapter *adapter, + struct ibmvnic_tx_pool *tx_pool) +{ + struct ibmvnic_tx_buff *tx_buff; + u64 tx_entries; + int i; + + if (!tx_pool || !tx_pool->tx_buff) + return; + + tx_entries = tx_pool->num_buffers; + + for (i = 0; i < tx_entries; i++) { + tx_buff = &tx_pool->tx_buff[i]; + if (tx_buff && tx_buff->skb) { + dev_kfree_skb_any(tx_buff->skb); + tx_buff->skb = NULL; + } + } +} + +static void clean_tx_pools(struct ibmvnic_adapter *adapter) +{ + int tx_scrqs; + int i; + + if (!adapter->tx_pool || !adapter->tso_pool) + return; + + tx_scrqs = adapter->num_active_tx_pools; + + /* Free any remaining skbs in the tx buffer pools */ + for (i = 0; i < tx_scrqs; i++) { + netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i); + clean_one_tx_pool(adapter, &adapter->tx_pool[i]); + clean_one_tx_pool(adapter, &adapter->tso_pool[i]); + } +} + +static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int i; + + if (adapter->tx_scrq) { + for (i = 0; i < adapter->req_tx_queues; i++) + if (adapter->tx_scrq[i]->irq) { + netdev_dbg(netdev, + "Disabling tx_scrq[%d] irq\n", i); + disable_scrq_irq(adapter, adapter->tx_scrq[i]); + disable_irq(adapter->tx_scrq[i]->irq); + } + } + + if (adapter->rx_scrq) { + for (i = 0; i < adapter->req_rx_queues; i++) { + if (adapter->rx_scrq[i]->irq) { + netdev_dbg(netdev, + "Disabling rx_scrq[%d] irq\n", i); + disable_scrq_irq(adapter, adapter->rx_scrq[i]); + disable_irq(adapter->rx_scrq[i]->irq); + } + } + } +} + +static void ibmvnic_cleanup(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + /* ensure that transmissions are stopped if called by do_reset */ + + adapter->tx_queues_active = false; + + /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active + * update so they don't restart a queue after we stop it below. + */ + synchronize_rcu(); + + if (test_bit(0, &adapter->resetting)) + netif_tx_disable(netdev); + else + netif_tx_stop_all_queues(netdev); + + ibmvnic_napi_disable(adapter); + ibmvnic_disable_irqs(adapter); +} + +static int __ibmvnic_close(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int rc = 0; + + adapter->state = VNIC_CLOSING; + rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); + adapter->state = VNIC_CLOSED; + return rc; +} + +static int ibmvnic_close(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int rc; + + netdev_dbg(netdev, "[S:%s FOP:%d FRR:%d] Closing\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, + adapter->force_reset_recovery); + + /* If device failover is pending, just set device state and return. + * Device operation will be handled by reset routine. + */ + if (adapter->failover_pending) { + adapter->state = VNIC_CLOSED; + return 0; + } + + rc = __ibmvnic_close(netdev); + ibmvnic_cleanup(netdev); + clean_rx_pools(adapter); + clean_tx_pools(adapter); + + return rc; +} + +/** + * build_hdr_data - creates L2/L3/L4 header data buffer + * @hdr_field: bitfield determining needed headers + * @skb: socket buffer + * @hdr_len: array of header lengths + * @hdr_data: buffer to write the header to + * + * Reads hdr_field to determine which headers are needed by firmware. + * Builds a buffer containing these headers. Saves individual header + * lengths and total buffer length to be used to build descriptors. + */ +static int build_hdr_data(u8 hdr_field, struct sk_buff *skb, + int *hdr_len, u8 *hdr_data) +{ + int len = 0; + u8 *hdr; + + if (skb_vlan_tagged(skb) && !skb_vlan_tag_present(skb)) + hdr_len[0] = sizeof(struct vlan_ethhdr); + else + hdr_len[0] = sizeof(struct ethhdr); + + if (skb->protocol == htons(ETH_P_IP)) { + hdr_len[1] = ip_hdr(skb)->ihl * 4; + if (ip_hdr(skb)->protocol == IPPROTO_TCP) + hdr_len[2] = tcp_hdrlen(skb); + else if (ip_hdr(skb)->protocol == IPPROTO_UDP) + hdr_len[2] = sizeof(struct udphdr); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + hdr_len[1] = sizeof(struct ipv6hdr); + if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) + hdr_len[2] = tcp_hdrlen(skb); + else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) + hdr_len[2] = sizeof(struct udphdr); + } else if (skb->protocol == htons(ETH_P_ARP)) { + hdr_len[1] = arp_hdr_len(skb->dev); + hdr_len[2] = 0; + } + + memset(hdr_data, 0, 120); + if ((hdr_field >> 6) & 1) { + hdr = skb_mac_header(skb); + memcpy(hdr_data, hdr, hdr_len[0]); + len += hdr_len[0]; + } + + if ((hdr_field >> 5) & 1) { + hdr = skb_network_header(skb); + memcpy(hdr_data + len, hdr, hdr_len[1]); + len += hdr_len[1]; + } + + if ((hdr_field >> 4) & 1) { + hdr = skb_transport_header(skb); + memcpy(hdr_data + len, hdr, hdr_len[2]); + len += hdr_len[2]; + } + return len; +} + +/** + * create_hdr_descs - create header and header extension descriptors + * @hdr_field: bitfield determining needed headers + * @hdr_data: buffer containing header data + * @len: length of data buffer + * @hdr_len: array of individual header lengths + * @scrq_arr: descriptor array + * + * Creates header and, if needed, header extension descriptors and + * places them in a descriptor array, scrq_arr + */ + +static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, + union sub_crq *scrq_arr) +{ + union sub_crq hdr_desc; + int tmp_len = len; + int num_descs = 0; + u8 *data, *cur; + int tmp; + + while (tmp_len > 0) { + cur = hdr_data + len - tmp_len; + + memset(&hdr_desc, 0, sizeof(hdr_desc)); + if (cur != hdr_data) { + data = hdr_desc.hdr_ext.data; + tmp = tmp_len > 29 ? 29 : tmp_len; + hdr_desc.hdr_ext.first = IBMVNIC_CRQ_CMD; + hdr_desc.hdr_ext.type = IBMVNIC_HDR_EXT_DESC; + hdr_desc.hdr_ext.len = tmp; + } else { + data = hdr_desc.hdr.data; + tmp = tmp_len > 24 ? 24 : tmp_len; + hdr_desc.hdr.first = IBMVNIC_CRQ_CMD; + hdr_desc.hdr.type = IBMVNIC_HDR_DESC; + hdr_desc.hdr.len = tmp; + hdr_desc.hdr.l2_len = (u8)hdr_len[0]; + hdr_desc.hdr.l3_len = cpu_to_be16((u16)hdr_len[1]); + hdr_desc.hdr.l4_len = (u8)hdr_len[2]; + hdr_desc.hdr.flag = hdr_field << 1; + } + memcpy(data, cur, tmp); + tmp_len -= tmp; + *scrq_arr = hdr_desc; + scrq_arr++; + num_descs++; + } + + return num_descs; +} + +/** + * build_hdr_descs_arr - build a header descriptor array + * @skb: tx socket buffer + * @indir_arr: indirect array + * @num_entries: number of descriptors to be sent + * @hdr_field: bit field determining which headers will be sent + * + * This function will build a TX descriptor array with applicable + * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect. + */ + +static void build_hdr_descs_arr(struct sk_buff *skb, + union sub_crq *indir_arr, + int *num_entries, u8 hdr_field) +{ + int hdr_len[3] = {0, 0, 0}; + u8 hdr_data[140] = {0}; + int tot_len; + + tot_len = build_hdr_data(hdr_field, skb, hdr_len, + hdr_data); + *num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len, + indir_arr + 1); +} + +static int ibmvnic_xmit_workarounds(struct sk_buff *skb, + struct net_device *netdev) +{ + /* For some backing devices, mishandling of small packets + * can result in a loss of connection or TX stall. Device + * architects recommend that no packet should be smaller + * than the minimum MTU value provided to the driver, so + * pad any packets to that length + */ + if (skb->len < netdev->min_mtu) + return skb_put_padto(skb, netdev->min_mtu); + + return 0; +} + +static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq) +{ + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_tx_buff *tx_buff; + struct ibmvnic_tx_pool *tx_pool; + union sub_crq tx_scrq_entry; + int queue_num; + int entries; + int index; + int i; + + ind_bufp = &tx_scrq->ind_buf; + entries = (u64)ind_bufp->index; + queue_num = tx_scrq->pool_index; + + for (i = entries - 1; i >= 0; --i) { + tx_scrq_entry = ind_bufp->indir_arr[i]; + if (tx_scrq_entry.v1.type != IBMVNIC_TX_DESC) + continue; + index = be32_to_cpu(tx_scrq_entry.v1.correlator); + if (index & IBMVNIC_TSO_POOL_MASK) { + tx_pool = &adapter->tso_pool[queue_num]; + index &= ~IBMVNIC_TSO_POOL_MASK; + } else { + tx_pool = &adapter->tx_pool[queue_num]; + } + tx_pool->free_map[tx_pool->consumer_index] = index; + tx_pool->consumer_index = tx_pool->consumer_index == 0 ? + tx_pool->num_buffers - 1 : + tx_pool->consumer_index - 1; + tx_buff = &tx_pool->tx_buff[index]; + adapter->netdev->stats.tx_packets--; + adapter->netdev->stats.tx_bytes -= tx_buff->skb->len; + adapter->tx_stats_buffers[queue_num].packets--; + adapter->tx_stats_buffers[queue_num].bytes -= + tx_buff->skb->len; + dev_kfree_skb_any(tx_buff->skb); + tx_buff->skb = NULL; + adapter->netdev->stats.tx_dropped++; + } + + ind_bufp->index = 0; + + if (atomic_sub_return(entries, &tx_scrq->used) <= + (adapter->req_tx_entries_per_subcrq / 2) && + __netif_subqueue_stopped(adapter->netdev, queue_num)) { + rcu_read_lock(); + + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, queue_num); + netdev_dbg(adapter->netdev, "Started queue %d\n", + queue_num); + } + + rcu_read_unlock(); + } +} + +static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq) +{ + struct ibmvnic_ind_xmit_queue *ind_bufp; + u64 dma_addr; + u64 entries; + u64 handle; + int rc; + + ind_bufp = &tx_scrq->ind_buf; + dma_addr = (u64)ind_bufp->indir_dma; + entries = (u64)ind_bufp->index; + handle = tx_scrq->handle; + + if (!entries) + return 0; + rc = send_subcrq_indirect(adapter, handle, dma_addr, entries); + if (rc) + ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); + else + ind_bufp->index = 0; + return 0; +} + +static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int queue_num = skb_get_queue_mapping(skb); + u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_tx_buff *tx_buff = NULL; + struct ibmvnic_sub_crq_queue *tx_scrq; + struct ibmvnic_long_term_buff *ltb; + struct ibmvnic_tx_pool *tx_pool; + unsigned int tx_send_failed = 0; + netdev_tx_t ret = NETDEV_TX_OK; + unsigned int tx_map_failed = 0; + union sub_crq indir_arr[16]; + unsigned int tx_dropped = 0; + unsigned int tx_packets = 0; + unsigned int tx_bytes = 0; + dma_addr_t data_dma_addr; + struct netdev_queue *txq; + unsigned long lpar_rc; + union sub_crq tx_crq; + unsigned int offset; + int num_entries = 1; + unsigned char *dst; + int bufidx = 0; + u8 proto = 0; + + /* If a reset is in progress, drop the packet since + * the scrqs may get torn down. Otherwise use the + * rcu to ensure reset waits for us to complete. + */ + rcu_read_lock(); + if (!adapter->tx_queues_active) { + dev_kfree_skb_any(skb); + + tx_send_failed++; + tx_dropped++; + ret = NETDEV_TX_OK; + goto out; + } + + tx_scrq = adapter->tx_scrq[queue_num]; + txq = netdev_get_tx_queue(netdev, queue_num); + ind_bufp = &tx_scrq->ind_buf; + + if (ibmvnic_xmit_workarounds(skb, netdev)) { + tx_dropped++; + tx_send_failed++; + ret = NETDEV_TX_OK; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); + goto out; + } + + if (skb_is_gso(skb)) + tx_pool = &adapter->tso_pool[queue_num]; + else + tx_pool = &adapter->tx_pool[queue_num]; + + bufidx = tx_pool->free_map[tx_pool->consumer_index]; + + if (bufidx == IBMVNIC_INVALID_MAP) { + dev_kfree_skb_any(skb); + tx_send_failed++; + tx_dropped++; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); + ret = NETDEV_TX_OK; + goto out; + } + + tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP; + + map_txpool_buf_to_ltb(tx_pool, bufidx, <b, &offset); + + dst = ltb->buff + offset; + memset(dst, 0, tx_pool->buf_size); + data_dma_addr = ltb->addr + offset; + + if (skb_shinfo(skb)->nr_frags) { + int cur, i; + + /* Copy the head */ + skb_copy_from_linear_data(skb, dst, skb_headlen(skb)); + cur = skb_headlen(skb); + + /* Copy the frags */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + memcpy(dst + cur, skb_frag_address(frag), + skb_frag_size(frag)); + cur += skb_frag_size(frag); + } + } else { + skb_copy_from_linear_data(skb, dst, skb->len); + } + + /* post changes to long_term_buff *dst before VIOS accessing it */ + dma_wmb(); + + tx_pool->consumer_index = + (tx_pool->consumer_index + 1) % tx_pool->num_buffers; + + tx_buff = &tx_pool->tx_buff[bufidx]; + tx_buff->skb = skb; + tx_buff->index = bufidx; + tx_buff->pool_index = queue_num; + + memset(&tx_crq, 0, sizeof(tx_crq)); + tx_crq.v1.first = IBMVNIC_CRQ_CMD; + tx_crq.v1.type = IBMVNIC_TX_DESC; + tx_crq.v1.n_crq_elem = 1; + tx_crq.v1.n_sge = 1; + tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED; + + if (skb_is_gso(skb)) + tx_crq.v1.correlator = + cpu_to_be32(bufidx | IBMVNIC_TSO_POOL_MASK); + else + tx_crq.v1.correlator = cpu_to_be32(bufidx); + tx_crq.v1.dma_reg = cpu_to_be16(ltb->map_id); + tx_crq.v1.sge_len = cpu_to_be32(skb->len); + tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); + + if (adapter->vlan_header_insertion && skb_vlan_tag_present(skb)) { + tx_crq.v1.flags2 |= IBMVNIC_TX_VLAN_INSERT; + tx_crq.v1.vlan_id = cpu_to_be16(skb->vlan_tci); + } + + if (skb->protocol == htons(ETH_P_IP)) { + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4; + proto = ip_hdr(skb)->protocol; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6; + proto = ipv6_hdr(skb)->nexthdr; + } + + if (proto == IPPROTO_TCP) + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP; + else if (proto == IPPROTO_UDP) + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD; + hdrs += 2; + } + if (skb_is_gso(skb)) { + tx_crq.v1.flags1 |= IBMVNIC_TX_LSO; + tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + hdrs += 2; + } + + if ((*hdrs >> 7) & 1) + build_hdr_descs_arr(skb, indir_arr, &num_entries, *hdrs); + + tx_crq.v1.n_crq_elem = num_entries; + tx_buff->num_entries = num_entries; + /* flush buffer if current entry can not fit */ + if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (lpar_rc != H_SUCCESS) + goto tx_flush_err; + } + + indir_arr[0] = tx_crq; + memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0], + num_entries * sizeof(struct ibmvnic_generic_scrq)); + ind_bufp->index += num_entries; + if (__netdev_tx_sent_queue(txq, skb->len, + netdev_xmit_more() && + ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (lpar_rc != H_SUCCESS) + goto tx_err; + } + + if (atomic_add_return(num_entries, &tx_scrq->used) + >= adapter->req_tx_entries_per_subcrq) { + netdev_dbg(netdev, "Stopping queue %d\n", queue_num); + netif_stop_subqueue(netdev, queue_num); + } + + tx_packets++; + tx_bytes += skb->len; + txq_trans_cond_update(txq); + ret = NETDEV_TX_OK; + goto out; + +tx_flush_err: + dev_kfree_skb_any(skb); + tx_buff->skb = NULL; + tx_pool->consumer_index = tx_pool->consumer_index == 0 ? + tx_pool->num_buffers - 1 : + tx_pool->consumer_index - 1; + tx_dropped++; +tx_err: + if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER) + dev_err_ratelimited(dev, "tx: send failed\n"); + + if (lpar_rc == H_CLOSED || adapter->failover_pending) { + /* Disable TX and report carrier off if queue is closed + * or pending failover. + * Firmware guarantees that a signal will be sent to the + * driver, triggering a reset or some other action. + */ + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } +out: + rcu_read_unlock(); + netdev->stats.tx_dropped += tx_dropped; + netdev->stats.tx_bytes += tx_bytes; + netdev->stats.tx_packets += tx_packets; + adapter->tx_send_failed += tx_send_failed; + adapter->tx_map_failed += tx_map_failed; + adapter->tx_stats_buffers[queue_num].packets += tx_packets; + adapter->tx_stats_buffers[queue_num].bytes += tx_bytes; + adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped; + + return ret; +} + +static void ibmvnic_set_multi(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + struct netdev_hw_addr *ha; + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.request_capability.first = IBMVNIC_CRQ_CMD; + crq.request_capability.cmd = REQUEST_CAPABILITY; + + if (netdev->flags & IFF_PROMISC) { + if (!adapter->promisc_supported) + return; + } else { + if (netdev->flags & IFF_ALLMULTI) { + /* Accept all multicast */ + memset(&crq, 0, sizeof(crq)); + crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD; + crq.multicast_ctrl.cmd = MULTICAST_CTRL; + crq.multicast_ctrl.flags = IBMVNIC_ENABLE_ALL; + ibmvnic_send_crq(adapter, &crq); + } else if (netdev_mc_empty(netdev)) { + /* Reject all multicast */ + memset(&crq, 0, sizeof(crq)); + crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD; + crq.multicast_ctrl.cmd = MULTICAST_CTRL; + crq.multicast_ctrl.flags = IBMVNIC_DISABLE_ALL; + ibmvnic_send_crq(adapter, &crq); + } else { + /* Accept one or more multicast(s) */ + netdev_for_each_mc_addr(ha, netdev) { + memset(&crq, 0, sizeof(crq)); + crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD; + crq.multicast_ctrl.cmd = MULTICAST_CTRL; + crq.multicast_ctrl.flags = IBMVNIC_ENABLE_MC; + ether_addr_copy(&crq.multicast_ctrl.mac_addr[0], + ha->addr); + ibmvnic_send_crq(adapter, &crq); + } + } + } +} + +static int __ibmvnic_set_mac(struct net_device *netdev, u8 *dev_addr) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + union ibmvnic_crq crq; + int rc; + + if (!is_valid_ether_addr(dev_addr)) { + rc = -EADDRNOTAVAIL; + goto err; + } + + memset(&crq, 0, sizeof(crq)); + crq.change_mac_addr.first = IBMVNIC_CRQ_CMD; + crq.change_mac_addr.cmd = CHANGE_MAC_ADDR; + ether_addr_copy(&crq.change_mac_addr.mac_addr[0], dev_addr); + + mutex_lock(&adapter->fw_lock); + adapter->fw_done_rc = 0; + reinit_completion(&adapter->fw_done); + + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + rc = -EIO; + mutex_unlock(&adapter->fw_lock); + goto err; + } + + rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); + /* netdev->dev_addr is changed in handle_change_mac_rsp function */ + if (rc || adapter->fw_done_rc) { + rc = -EIO; + mutex_unlock(&adapter->fw_lock); + goto err; + } + mutex_unlock(&adapter->fw_lock); + return 0; +err: + ether_addr_copy(adapter->mac_addr, netdev->dev_addr); + return rc; +} + +static int ibmvnic_set_mac(struct net_device *netdev, void *p) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + struct sockaddr *addr = p; + int rc; + + rc = 0; + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + ether_addr_copy(adapter->mac_addr, addr->sa_data); + if (adapter->state != VNIC_PROBED) + rc = __ibmvnic_set_mac(netdev, addr->sa_data); + + return rc; +} + +static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason) +{ + switch (reason) { + case VNIC_RESET_FAILOVER: + return "FAILOVER"; + case VNIC_RESET_MOBILITY: + return "MOBILITY"; + case VNIC_RESET_FATAL: + return "FATAL"; + case VNIC_RESET_NON_FATAL: + return "NON_FATAL"; + case VNIC_RESET_TIMEOUT: + return "TIMEOUT"; + case VNIC_RESET_CHANGE_PARAM: + return "CHANGE_PARAM"; + case VNIC_RESET_PASSIVE_INIT: + return "PASSIVE_INIT"; + } + return "UNKNOWN"; +} + +/* + * Initialize the init_done completion and return code values. We + * can get a transport event just after registering the CRQ and the + * tasklet will use this to communicate the transport event. To ensure + * we don't miss the notification/error, initialize these _before_ + * regisering the CRQ. + */ +static inline void reinit_init_done(struct ibmvnic_adapter *adapter) +{ + reinit_completion(&adapter->init_done); + adapter->init_done_rc = 0; +} + +/* + * do_reset returns zero if we are able to keep processing reset events, or + * non-zero if we hit a fatal error and must halt. + */ +static int do_reset(struct ibmvnic_adapter *adapter, + struct ibmvnic_rwi *rwi, u32 reset_state) +{ + struct net_device *netdev = adapter->netdev; + u64 old_num_rx_queues, old_num_tx_queues; + u64 old_num_rx_slots, old_num_tx_slots; + int rc; + + netdev_dbg(adapter->netdev, + "[S:%s FOP:%d] Reset reason: %s, reset_state: %s\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, + reset_reason_to_string(rwi->reset_reason), + adapter_state_to_string(reset_state)); + + adapter->reset_reason = rwi->reset_reason; + /* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */ + if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) + rtnl_lock(); + + /* Now that we have the rtnl lock, clear any pending failover. + * This will ensure ibmvnic_open() has either completed or will + * block until failover is complete. + */ + if (rwi->reset_reason == VNIC_RESET_FAILOVER) + adapter->failover_pending = false; + + /* read the state and check (again) after getting rtnl */ + reset_state = adapter->state; + + if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) { + rc = -EBUSY; + goto out; + } + + netif_carrier_off(netdev); + + old_num_rx_queues = adapter->req_rx_queues; + old_num_tx_queues = adapter->req_tx_queues; + old_num_rx_slots = adapter->req_rx_add_entries_per_subcrq; + old_num_tx_slots = adapter->req_tx_entries_per_subcrq; + + ibmvnic_cleanup(netdev); + + if (reset_state == VNIC_OPEN && + adapter->reset_reason != VNIC_RESET_MOBILITY && + adapter->reset_reason != VNIC_RESET_FAILOVER) { + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + rc = __ibmvnic_close(netdev); + if (rc) + goto out; + } else { + adapter->state = VNIC_CLOSING; + + /* Release the RTNL lock before link state change and + * re-acquire after the link state change to allow + * linkwatch_event to grab the RTNL lock and run during + * a reset. + */ + rtnl_unlock(); + rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); + rtnl_lock(); + if (rc) + goto out; + + if (adapter->state == VNIC_OPEN) { + /* When we dropped rtnl, ibmvnic_open() got + * it and noticed that we are resetting and + * set the adapter state to OPEN. Update our + * new "target" state, and resume the reset + * from VNIC_CLOSING state. + */ + netdev_dbg(netdev, + "Open changed state from %s, updating.\n", + adapter_state_to_string(reset_state)); + reset_state = VNIC_OPEN; + adapter->state = VNIC_CLOSING; + } + + if (adapter->state != VNIC_CLOSING) { + /* If someone else changed the adapter state + * when we dropped the rtnl, fail the reset + */ + rc = -EAGAIN; + goto out; + } + adapter->state = VNIC_CLOSED; + } + } + + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + release_resources(adapter); + release_sub_crqs(adapter, 1); + release_crq_queue(adapter); + } + + if (adapter->reset_reason != VNIC_RESET_NON_FATAL) { + /* remove the closed state so when we call open it appears + * we are coming from the probed state. + */ + adapter->state = VNIC_PROBED; + + reinit_init_done(adapter); + + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + rc = init_crq_queue(adapter); + } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) { + rc = ibmvnic_reenable_crq_queue(adapter); + release_sub_crqs(adapter, 1); + } else { + rc = ibmvnic_reset_crq(adapter); + if (rc == H_CLOSED || rc == H_SUCCESS) { + rc = vio_enable_interrupts(adapter->vdev); + if (rc) + netdev_err(adapter->netdev, + "Reset failed to enable interrupts. rc=%d\n", + rc); + } + } + + if (rc) { + netdev_err(adapter->netdev, + "Reset couldn't initialize crq. rc=%d\n", rc); + goto out; + } + + rc = ibmvnic_reset_init(adapter, true); + if (rc) + goto out; + + /* If the adapter was in PROBE or DOWN state prior to the reset, + * exit here. + */ + if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) { + rc = 0; + goto out; + } + + rc = ibmvnic_login(netdev); + if (rc) + goto out; + + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + rc = init_resources(adapter); + if (rc) + goto out; + } else if (adapter->req_rx_queues != old_num_rx_queues || + adapter->req_tx_queues != old_num_tx_queues || + adapter->req_rx_add_entries_per_subcrq != + old_num_rx_slots || + adapter->req_tx_entries_per_subcrq != + old_num_tx_slots || + !adapter->rx_pool || + !adapter->tso_pool || + !adapter->tx_pool) { + release_napi(adapter); + release_vpd_data(adapter); + + rc = init_resources(adapter); + if (rc) + goto out; + + } else { + rc = init_tx_pools(netdev); + if (rc) { + netdev_dbg(netdev, + "init tx pools failed (%d)\n", + rc); + goto out; + } + + rc = init_rx_pools(netdev); + if (rc) { + netdev_dbg(netdev, + "init rx pools failed (%d)\n", + rc); + goto out; + } + } + ibmvnic_disable_irqs(adapter); + } + adapter->state = VNIC_CLOSED; + + if (reset_state == VNIC_CLOSED) { + rc = 0; + goto out; + } + + rc = __ibmvnic_open(netdev); + if (rc) { + rc = IBMVNIC_OPEN_FAILED; + goto out; + } + + /* refresh device's multicast list */ + ibmvnic_set_multi(netdev); + + if (adapter->reset_reason == VNIC_RESET_FAILOVER || + adapter->reset_reason == VNIC_RESET_MOBILITY) + __netdev_notify_peers(netdev); + + rc = 0; + +out: + /* restore the adapter state if reset failed */ + if (rc) + adapter->state = reset_state; + /* requestor of VNIC_RESET_CHANGE_PARAM should still hold the rtnl lock */ + if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) + rtnl_unlock(); + + netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Reset done, rc %d\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, rc); + return rc; +} + +static int do_hard_reset(struct ibmvnic_adapter *adapter, + struct ibmvnic_rwi *rwi, u32 reset_state) +{ + struct net_device *netdev = adapter->netdev; + int rc; + + netdev_dbg(adapter->netdev, "Hard resetting driver (%s)\n", + reset_reason_to_string(rwi->reset_reason)); + + /* read the state and check (again) after getting rtnl */ + reset_state = adapter->state; + + if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) { + rc = -EBUSY; + goto out; + } + + netif_carrier_off(netdev); + adapter->reset_reason = rwi->reset_reason; + + ibmvnic_cleanup(netdev); + release_resources(adapter); + release_sub_crqs(adapter, 0); + release_crq_queue(adapter); + + /* remove the closed state so when we call open it appears + * we are coming from the probed state. + */ + adapter->state = VNIC_PROBED; + + reinit_init_done(adapter); + + rc = init_crq_queue(adapter); + if (rc) { + netdev_err(adapter->netdev, + "Couldn't initialize crq. rc=%d\n", rc); + goto out; + } + + rc = ibmvnic_reset_init(adapter, false); + if (rc) + goto out; + + /* If the adapter was in PROBE or DOWN state prior to the reset, + * exit here. + */ + if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) + goto out; + + rc = ibmvnic_login(netdev); + if (rc) + goto out; + + rc = init_resources(adapter); + if (rc) + goto out; + + ibmvnic_disable_irqs(adapter); + adapter->state = VNIC_CLOSED; + + if (reset_state == VNIC_CLOSED) + goto out; + + rc = __ibmvnic_open(netdev); + if (rc) { + rc = IBMVNIC_OPEN_FAILED; + goto out; + } + + __netdev_notify_peers(netdev); +out: + /* restore adapter state if reset failed */ + if (rc) + adapter->state = reset_state; + netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Hard reset done, rc %d\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, rc); + return rc; +} + +static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_rwi *rwi; + unsigned long flags; + + spin_lock_irqsave(&adapter->rwi_lock, flags); + + if (!list_empty(&adapter->rwi_list)) { + rwi = list_first_entry(&adapter->rwi_list, struct ibmvnic_rwi, + list); + list_del(&rwi->list); + } else { + rwi = NULL; + } + + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + return rwi; +} + +/** + * do_passive_init - complete probing when partner device is detected. + * @adapter: ibmvnic_adapter struct + * + * If the ibmvnic device does not have a partner device to communicate with at boot + * and that partner device comes online at a later time, this function is called + * to complete the initialization process of ibmvnic device. + * Caller is expected to hold rtnl_lock(). + * + * Returns non-zero if sub-CRQs are not initialized properly leaving the device + * in the down state. + * Returns 0 upon success and the device is in PROBED state. + */ + +static int do_passive_init(struct ibmvnic_adapter *adapter) +{ + unsigned long timeout = msecs_to_jiffies(30000); + struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->vdev->dev; + int rc; + + netdev_dbg(netdev, "Partner device found, probing.\n"); + + adapter->state = VNIC_PROBING; + reinit_completion(&adapter->init_done); + adapter->init_done_rc = 0; + adapter->crq.active = true; + + rc = send_crq_init_complete(adapter); + if (rc) + goto out; + + rc = send_version_xchg(adapter); + if (rc) + netdev_dbg(adapter->netdev, "send_version_xchg failed, rc=%d\n", rc); + + if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { + dev_err(dev, "Initialization sequence timed out\n"); + rc = -ETIMEDOUT; + goto out; + } + + rc = init_sub_crqs(adapter); + if (rc) { + dev_err(dev, "Initialization of sub crqs failed, rc=%d\n", rc); + goto out; + } + + rc = init_sub_crq_irqs(adapter); + if (rc) { + dev_err(dev, "Failed to initialize sub crq irqs\n, rc=%d", rc); + goto init_failed; + } + + netdev->mtu = adapter->req_mtu - ETH_HLEN; + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; + + adapter->state = VNIC_PROBED; + netdev_dbg(netdev, "Probed successfully. Waiting for signal from partner device.\n"); + + return 0; + +init_failed: + release_sub_crqs(adapter, 1); +out: + adapter->state = VNIC_DOWN; + return rc; +} + +static void __ibmvnic_reset(struct work_struct *work) +{ + struct ibmvnic_adapter *adapter; + unsigned int timeout = 5000; + struct ibmvnic_rwi *tmprwi; + bool saved_state = false; + struct ibmvnic_rwi *rwi; + unsigned long flags; + struct device *dev; + bool need_reset; + int num_fails = 0; + u32 reset_state; + int rc = 0; + + adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); + dev = &adapter->vdev->dev; + + /* Wait for ibmvnic_probe() to complete. If probe is taking too long + * or if another reset is in progress, defer work for now. If probe + * eventually fails it will flush and terminate our work. + * + * Three possibilities here: + * 1. Adpater being removed - just return + * 2. Timed out on probe or another reset in progress - delay the work + * 3. Completed probe - perform any resets in queue + */ + if (adapter->state == VNIC_PROBING && + !wait_for_completion_timeout(&adapter->probe_done, timeout)) { + dev_err(dev, "Reset thread timed out on probe"); + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); + return; + } + + /* adapter is done with probe (i.e state is never VNIC_PROBING now) */ + if (adapter->state == VNIC_REMOVING) + return; + + /* ->rwi_list is stable now (no one else is removing entries) */ + + /* ibmvnic_probe() may have purged the reset queue after we were + * scheduled to process a reset so there maybe no resets to process. + * Before setting the ->resetting bit though, we have to make sure + * that there is infact a reset to process. Otherwise we may race + * with ibmvnic_open() and end up leaving the vnic down: + * + * __ibmvnic_reset() ibmvnic_open() + * ----------------- -------------- + * + * set ->resetting bit + * find ->resetting bit is set + * set ->state to IBMVNIC_OPEN (i.e + * assume reset will open device) + * return + * find reset queue empty + * return + * + * Neither performed vnic login/open and vnic stays down + * + * If we hold the lock and conditionally set the bit, either we + * or ibmvnic_open() will complete the open. + */ + need_reset = false; + spin_lock(&adapter->rwi_lock); + if (!list_empty(&adapter->rwi_list)) { + if (test_and_set_bit_lock(0, &adapter->resetting)) { + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); + } else { + need_reset = true; + } + } + spin_unlock(&adapter->rwi_lock); + + if (!need_reset) + return; + + rwi = get_next_rwi(adapter); + while (rwi) { + spin_lock_irqsave(&adapter->state_lock, flags); + + if (adapter->state == VNIC_REMOVING || + adapter->state == VNIC_REMOVED) { + spin_unlock_irqrestore(&adapter->state_lock, flags); + kfree(rwi); + rc = EBUSY; + break; + } + + if (!saved_state) { + reset_state = adapter->state; + saved_state = true; + } + spin_unlock_irqrestore(&adapter->state_lock, flags); + + if (rwi->reset_reason == VNIC_RESET_PASSIVE_INIT) { + rtnl_lock(); + rc = do_passive_init(adapter); + rtnl_unlock(); + if (!rc) + netif_carrier_on(adapter->netdev); + } else if (adapter->force_reset_recovery) { + /* Since we are doing a hard reset now, clear the + * failover_pending flag so we don't ignore any + * future MOBILITY or other resets. + */ + adapter->failover_pending = false; + + /* Transport event occurred during previous reset */ + if (adapter->wait_for_reset) { + /* Previous was CHANGE_PARAM; caller locked */ + adapter->force_reset_recovery = false; + rc = do_hard_reset(adapter, rwi, reset_state); + } else { + rtnl_lock(); + adapter->force_reset_recovery = false; + rc = do_hard_reset(adapter, rwi, reset_state); + rtnl_unlock(); + } + if (rc) + num_fails++; + else + num_fails = 0; + + /* If auto-priority-failover is enabled we can get + * back to back failovers during resets, resulting + * in at least two failed resets (from high-priority + * backing device to low-priority one and then back) + * If resets continue to fail beyond that, give the + * adapter some time to settle down before retrying. + */ + if (num_fails >= 3) { + netdev_dbg(adapter->netdev, + "[S:%s] Hard reset failed %d times, waiting 60 secs\n", + adapter_state_to_string(adapter->state), + num_fails); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(60 * HZ); + } + } else { + rc = do_reset(adapter, rwi, reset_state); + } + tmprwi = rwi; + adapter->last_reset_time = jiffies; + + if (rc) + netdev_dbg(adapter->netdev, "Reset failed, rc=%d\n", rc); + + rwi = get_next_rwi(adapter); + + /* + * If there are no resets queued and the previous reset failed, + * the adapter would be in an undefined state. So retry the + * previous reset as a hard reset. + * + * Else, free the previous rwi and, if there is another reset + * queued, process the new reset even if previous reset failed + * (the previous reset could have failed because of a fail + * over for instance, so process the fail over). + */ + if (!rwi && rc) + rwi = tmprwi; + else + kfree(tmprwi); + + if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER || + rwi->reset_reason == VNIC_RESET_MOBILITY || rc)) + adapter->force_reset_recovery = true; + } + + if (adapter->wait_for_reset) { + adapter->reset_done_rc = rc; + complete(&adapter->reset_done); + } + + clear_bit_unlock(0, &adapter->resetting); + + netdev_dbg(adapter->netdev, + "[S:%s FRR:%d WFR:%d] Done processing resets\n", + adapter_state_to_string(adapter->state), + adapter->force_reset_recovery, + adapter->wait_for_reset); +} + +static void __ibmvnic_delayed_reset(struct work_struct *work) +{ + struct ibmvnic_adapter *adapter; + + adapter = container_of(work, struct ibmvnic_adapter, + ibmvnic_delayed_reset.work); + __ibmvnic_reset(&adapter->ibmvnic_reset); +} + +static void flush_reset_queue(struct ibmvnic_adapter *adapter) +{ + struct list_head *entry, *tmp_entry; + + if (!list_empty(&adapter->rwi_list)) { + list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) { + list_del(entry); + kfree(list_entry(entry, struct ibmvnic_rwi, list)); + } + } +} + +static int ibmvnic_reset(struct ibmvnic_adapter *adapter, + enum ibmvnic_reset_reason reason) +{ + struct net_device *netdev = adapter->netdev; + struct ibmvnic_rwi *rwi, *tmp; + unsigned long flags; + int ret; + + spin_lock_irqsave(&adapter->rwi_lock, flags); + + /* If failover is pending don't schedule any other reset. + * Instead let the failover complete. If there is already a + * a failover reset scheduled, we will detect and drop the + * duplicate reset when walking the ->rwi_list below. + */ + if (adapter->state == VNIC_REMOVING || + adapter->state == VNIC_REMOVED || + (adapter->failover_pending && reason != VNIC_RESET_FAILOVER)) { + ret = EBUSY; + netdev_dbg(netdev, "Adapter removing or pending failover, skipping reset\n"); + goto err; + } + + list_for_each_entry(tmp, &adapter->rwi_list, list) { + if (tmp->reset_reason == reason) { + netdev_dbg(netdev, "Skipping matching reset, reason=%s\n", + reset_reason_to_string(reason)); + ret = EBUSY; + goto err; + } + } + + rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC); + if (!rwi) { + ret = ENOMEM; + goto err; + } + /* if we just received a transport event, + * flush reset queue and process this reset + */ + if (adapter->force_reset_recovery) + flush_reset_queue(adapter); + + rwi->reset_reason = reason; + list_add_tail(&rwi->list, &adapter->rwi_list); + netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n", + reset_reason_to_string(reason)); + queue_work(system_long_wq, &adapter->ibmvnic_reset); + + ret = 0; +err: + /* ibmvnic_close() below can block, so drop the lock first */ + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + + if (ret == ENOMEM) + ibmvnic_close(netdev); + + return -ret; +} + +static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct ibmvnic_adapter *adapter = netdev_priv(dev); + + if (test_bit(0, &adapter->resetting)) { + netdev_err(adapter->netdev, + "Adapter is resetting, skip timeout reset\n"); + return; + } + /* No queuing up reset until at least 5 seconds (default watchdog val) + * after last reset + */ + if (time_before(jiffies, (adapter->last_reset_time + dev->watchdog_timeo))) { + netdev_dbg(dev, "Not yet time to tx timeout.\n"); + return; + } + ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT); +} + +static void remove_buff_from_pool(struct ibmvnic_adapter *adapter, + struct ibmvnic_rx_buff *rx_buff) +{ + struct ibmvnic_rx_pool *pool = &adapter->rx_pool[rx_buff->pool_index]; + + rx_buff->skb = NULL; + + pool->free_map[pool->next_alloc] = (int)(rx_buff - pool->rx_buff); + pool->next_alloc = (pool->next_alloc + 1) % pool->size; + + atomic_dec(&pool->available); +} + +static int ibmvnic_poll(struct napi_struct *napi, int budget) +{ + struct ibmvnic_sub_crq_queue *rx_scrq; + struct ibmvnic_adapter *adapter; + struct net_device *netdev; + int frames_processed; + int scrq_num; + + netdev = napi->dev; + adapter = netdev_priv(netdev); + scrq_num = (int)(napi - adapter->napi); + frames_processed = 0; + rx_scrq = adapter->rx_scrq[scrq_num]; + +restart_poll: + while (frames_processed < budget) { + struct sk_buff *skb; + struct ibmvnic_rx_buff *rx_buff; + union sub_crq *next; + u32 length; + u16 offset; + u8 flags = 0; + + if (unlikely(test_bit(0, &adapter->resetting) && + adapter->reset_reason != VNIC_RESET_NON_FATAL)) { + enable_scrq_irq(adapter, rx_scrq); + napi_complete_done(napi, frames_processed); + return frames_processed; + } + + if (!pending_scrq(adapter, rx_scrq)) + break; + next = ibmvnic_next_scrq(adapter, rx_scrq); + rx_buff = (struct ibmvnic_rx_buff *) + be64_to_cpu(next->rx_comp.correlator); + /* do error checking */ + if (next->rx_comp.rc) { + netdev_dbg(netdev, "rx buffer returned with rc %x\n", + be16_to_cpu(next->rx_comp.rc)); + /* free the entry */ + next->rx_comp.first = 0; + dev_kfree_skb_any(rx_buff->skb); + remove_buff_from_pool(adapter, rx_buff); + continue; + } else if (!rx_buff->skb) { + /* free the entry */ + next->rx_comp.first = 0; + remove_buff_from_pool(adapter, rx_buff); + continue; + } + + length = be32_to_cpu(next->rx_comp.len); + offset = be16_to_cpu(next->rx_comp.off_frame_data); + flags = next->rx_comp.flags; + skb = rx_buff->skb; + /* load long_term_buff before copying to skb */ + dma_rmb(); + skb_copy_to_linear_data(skb, rx_buff->data + offset, + length); + + /* VLAN Header has been stripped by the system firmware and + * needs to be inserted by the driver + */ + if (adapter->rx_vlan_header_insertion && + (flags & IBMVNIC_VLAN_STRIPPED)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + ntohs(next->rx_comp.vlan_tci)); + + /* free the entry */ + next->rx_comp.first = 0; + remove_buff_from_pool(adapter, rx_buff); + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, netdev); + skb_record_rx_queue(skb, scrq_num); + + if (flags & IBMVNIC_IP_CHKSUM_GOOD && + flags & IBMVNIC_TCP_UDP_CHKSUM_GOOD) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + length = skb->len; + napi_gro_receive(napi, skb); /* send it up */ + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += length; + adapter->rx_stats_buffers[scrq_num].packets++; + adapter->rx_stats_buffers[scrq_num].bytes += length; + frames_processed++; + } + + if (adapter->state != VNIC_CLOSING && + ((atomic_read(&adapter->rx_pool[scrq_num].available) < + adapter->req_rx_add_entries_per_subcrq / 2) || + frames_processed < budget)) + replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]); + if (frames_processed < budget) { + if (napi_complete_done(napi, frames_processed)) { + enable_scrq_irq(adapter, rx_scrq); + if (pending_scrq(adapter, rx_scrq)) { + if (napi_reschedule(napi)) { + disable_scrq_irq(adapter, rx_scrq); + goto restart_poll; + } + } + } + } + return frames_processed; +} + +static int wait_for_reset(struct ibmvnic_adapter *adapter) +{ + int rc, ret; + + adapter->fallback.mtu = adapter->req_mtu; + adapter->fallback.rx_queues = adapter->req_rx_queues; + adapter->fallback.tx_queues = adapter->req_tx_queues; + adapter->fallback.rx_entries = adapter->req_rx_add_entries_per_subcrq; + adapter->fallback.tx_entries = adapter->req_tx_entries_per_subcrq; + + reinit_completion(&adapter->reset_done); + adapter->wait_for_reset = true; + rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM); + + if (rc) { + ret = rc; + goto out; + } + rc = ibmvnic_wait_for_completion(adapter, &adapter->reset_done, 60000); + if (rc) { + ret = -ENODEV; + goto out; + } + + ret = 0; + if (adapter->reset_done_rc) { + ret = -EIO; + adapter->desired.mtu = adapter->fallback.mtu; + adapter->desired.rx_queues = adapter->fallback.rx_queues; + adapter->desired.tx_queues = adapter->fallback.tx_queues; + adapter->desired.rx_entries = adapter->fallback.rx_entries; + adapter->desired.tx_entries = adapter->fallback.tx_entries; + + reinit_completion(&adapter->reset_done); + adapter->wait_for_reset = true; + rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM); + if (rc) { + ret = rc; + goto out; + } + rc = ibmvnic_wait_for_completion(adapter, &adapter->reset_done, + 60000); + if (rc) { + ret = -ENODEV; + goto out; + } + } +out: + adapter->wait_for_reset = false; + + return ret; +} + +static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + adapter->desired.mtu = new_mtu + ETH_HLEN; + + return wait_for_reset(adapter); +} + +static netdev_features_t ibmvnic_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + /* Some backing hardware adapters can not + * handle packets with a MSS less than 224 + * or with only one segment. + */ + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_size < 224 || + skb_shinfo(skb)->gso_segs == 1) + features &= ~NETIF_F_GSO_MASK; + } + + return features; +} + +static const struct net_device_ops ibmvnic_netdev_ops = { + .ndo_open = ibmvnic_open, + .ndo_stop = ibmvnic_close, + .ndo_start_xmit = ibmvnic_xmit, + .ndo_set_rx_mode = ibmvnic_set_multi, + .ndo_set_mac_address = ibmvnic_set_mac, + .ndo_validate_addr = eth_validate_addr, + .ndo_tx_timeout = ibmvnic_tx_timeout, + .ndo_change_mtu = ibmvnic_change_mtu, + .ndo_features_check = ibmvnic_features_check, +}; + +/* ethtool functions */ + +static int ibmvnic_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int rc; + + rc = send_query_phys_parms(adapter); + if (rc) { + adapter->speed = SPEED_UNKNOWN; + adapter->duplex = DUPLEX_UNKNOWN; + } + cmd->base.speed = adapter->speed; + cmd->base.duplex = adapter->duplex; + cmd->base.port = PORT_FIBRE; + cmd->base.phy_address = 0; + cmd->base.autoneg = AUTONEG_ENABLE; + + return 0; +} + +static void ibmvnic_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + strscpy(info->driver, ibmvnic_driver_name, sizeof(info->driver)); + strscpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version)); + strscpy(info->fw_version, adapter->fw_version, + sizeof(info->fw_version)); +} + +static u32 ibmvnic_get_msglevel(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + return adapter->msg_enable; +} + +static void ibmvnic_set_msglevel(struct net_device *netdev, u32 data) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + adapter->msg_enable = data; +} + +static u32 ibmvnic_get_link(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + /* Don't need to send a query because we request a logical link up at + * init and then we wait for link state indications + */ + return adapter->logical_link_state; +} + +static void ibmvnic_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; + ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; + ring->rx_mini_max_pending = 0; + ring->rx_jumbo_max_pending = 0; + ring->rx_pending = adapter->req_rx_add_entries_per_subcrq; + ring->tx_pending = adapter->req_tx_entries_per_subcrq; + ring->rx_mini_pending = 0; + ring->rx_jumbo_pending = 0; +} + +static int ibmvnic_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq || + ring->tx_pending > adapter->max_tx_entries_per_subcrq) { + netdev_err(netdev, "Invalid request.\n"); + netdev_err(netdev, "Max tx buffers = %llu\n", + adapter->max_rx_add_entries_per_subcrq); + netdev_err(netdev, "Max rx buffers = %llu\n", + adapter->max_tx_entries_per_subcrq); + return -EINVAL; + } + + adapter->desired.rx_entries = ring->rx_pending; + adapter->desired.tx_entries = ring->tx_pending; + + return wait_for_reset(adapter); +} + +static void ibmvnic_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + channels->max_rx = adapter->max_rx_queues; + channels->max_tx = adapter->max_tx_queues; + channels->max_other = 0; + channels->max_combined = 0; + channels->rx_count = adapter->req_rx_queues; + channels->tx_count = adapter->req_tx_queues; + channels->other_count = 0; + channels->combined_count = 0; +} + +static int ibmvnic_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + adapter->desired.rx_queues = channels->rx_count; + adapter->desired.tx_queues = channels->tx_count; + + return wait_for_reset(adapter); +} + +static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct ibmvnic_adapter *adapter = netdev_priv(dev); + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN) + memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); + + for (i = 0; i < adapter->req_tx_queues; i++) { + snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); + data += ETH_GSTRING_LEN; + + snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); + data += ETH_GSTRING_LEN; + + snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i); + data += ETH_GSTRING_LEN; + } + + for (i = 0; i < adapter->req_rx_queues; i++) { + snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); + data += ETH_GSTRING_LEN; + + snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); + data += ETH_GSTRING_LEN; + + snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); + data += ETH_GSTRING_LEN; + } +} + +static int ibmvnic_get_sset_count(struct net_device *dev, int sset) +{ + struct ibmvnic_adapter *adapter = netdev_priv(dev); + + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ibmvnic_stats) + + adapter->req_tx_queues * NUM_TX_STATS + + adapter->req_rx_queues * NUM_RX_STATS; + default: + return -EOPNOTSUPP; + } +} + +static void ibmvnic_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct ibmvnic_adapter *adapter = netdev_priv(dev); + union ibmvnic_crq crq; + int i, j; + int rc; + + memset(&crq, 0, sizeof(crq)); + crq.request_statistics.first = IBMVNIC_CRQ_CMD; + crq.request_statistics.cmd = REQUEST_STATISTICS; + crq.request_statistics.ioba = cpu_to_be32(adapter->stats_token); + crq.request_statistics.len = + cpu_to_be32(sizeof(struct ibmvnic_statistics)); + + /* Wait for data to be written */ + reinit_completion(&adapter->stats_done); + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) + return; + rc = ibmvnic_wait_for_completion(adapter, &adapter->stats_done, 10000); + if (rc) + return; + + for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) + data[i] = be64_to_cpu(IBMVNIC_GET_STAT + (adapter, ibmvnic_stats[i].offset)); + + for (j = 0; j < adapter->req_tx_queues; j++) { + data[i] = adapter->tx_stats_buffers[j].packets; + i++; + data[i] = adapter->tx_stats_buffers[j].bytes; + i++; + data[i] = adapter->tx_stats_buffers[j].dropped_packets; + i++; + } + + for (j = 0; j < adapter->req_rx_queues; j++) { + data[i] = adapter->rx_stats_buffers[j].packets; + i++; + data[i] = adapter->rx_stats_buffers[j].bytes; + i++; + data[i] = adapter->rx_stats_buffers[j].interrupts; + i++; + } +} + +static const struct ethtool_ops ibmvnic_ethtool_ops = { + .get_drvinfo = ibmvnic_get_drvinfo, + .get_msglevel = ibmvnic_get_msglevel, + .set_msglevel = ibmvnic_set_msglevel, + .get_link = ibmvnic_get_link, + .get_ringparam = ibmvnic_get_ringparam, + .set_ringparam = ibmvnic_set_ringparam, + .get_channels = ibmvnic_get_channels, + .set_channels = ibmvnic_set_channels, + .get_strings = ibmvnic_get_strings, + .get_sset_count = ibmvnic_get_sset_count, + .get_ethtool_stats = ibmvnic_get_ethtool_stats, + .get_link_ksettings = ibmvnic_get_link_ksettings, +}; + +/* Routines for managing CRQs/sCRQs */ + +static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq) +{ + int rc; + + if (!scrq) { + netdev_dbg(adapter->netdev, "Invalid scrq reset.\n"); + return -EINVAL; + } + + if (scrq->irq) { + free_irq(scrq->irq, scrq); + irq_dispose_mapping(scrq->irq); + scrq->irq = 0; + } + + if (scrq->msgs) { + memset(scrq->msgs, 0, 4 * PAGE_SIZE); + atomic_set(&scrq->used, 0); + scrq->cur = 0; + scrq->ind_buf.index = 0; + } else { + netdev_dbg(adapter->netdev, "Invalid scrq reset\n"); + return -EINVAL; + } + + rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token, + 4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq); + return rc; +} + +static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter) +{ + int i, rc; + + if (!adapter->tx_scrq || !adapter->rx_scrq) + return -EINVAL; + + for (i = 0; i < adapter->req_tx_queues; i++) { + netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i); + rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]); + if (rc) + return rc; + } + + for (i = 0; i < adapter->req_rx_queues; i++) { + netdev_dbg(adapter->netdev, "Re-setting rx_scrq[%d]\n", i); + rc = reset_one_sub_crq_queue(adapter, adapter->rx_scrq[i]); + if (rc) + return rc; + } + + return rc; +} + +static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq, + bool do_h_free) +{ + struct device *dev = &adapter->vdev->dev; + long rc; + + netdev_dbg(adapter->netdev, "Releasing sub-CRQ\n"); + + if (do_h_free) { + /* Close the sub-crqs */ + do { + rc = plpar_hcall_norets(H_FREE_SUB_CRQ, + adapter->vdev->unit_address, + scrq->crq_num); + } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); + + if (rc) { + netdev_err(adapter->netdev, + "Failed to release sub-CRQ %16lx, rc = %ld\n", + scrq->crq_num, rc); + } + } + + dma_free_coherent(dev, + IBMVNIC_IND_ARR_SZ, + scrq->ind_buf.indir_arr, + scrq->ind_buf.indir_dma); + + dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, + DMA_BIDIRECTIONAL); + free_pages((unsigned long)scrq->msgs, 2); + kfree(scrq); +} + +static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter + *adapter) +{ + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_sub_crq_queue *scrq; + int rc; + + scrq = kzalloc(sizeof(*scrq), GFP_KERNEL); + if (!scrq) + return NULL; + + scrq->msgs = + (union sub_crq *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 2); + if (!scrq->msgs) { + dev_warn(dev, "Couldn't allocate crq queue messages page\n"); + goto zero_page_failed; + } + + scrq->msg_token = dma_map_single(dev, scrq->msgs, 4 * PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, scrq->msg_token)) { + dev_warn(dev, "Couldn't map crq queue messages page\n"); + goto map_failed; + } + + rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token, + 4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq); + + if (rc == H_RESOURCE) + rc = ibmvnic_reset_crq(adapter); + + if (rc == H_CLOSED) { + dev_warn(dev, "Partner adapter not ready, waiting.\n"); + } else if (rc) { + dev_warn(dev, "Error %d registering sub-crq\n", rc); + goto reg_failed; + } + + scrq->adapter = adapter; + scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs); + scrq->ind_buf.index = 0; + + scrq->ind_buf.indir_arr = + dma_alloc_coherent(dev, + IBMVNIC_IND_ARR_SZ, + &scrq->ind_buf.indir_dma, + GFP_KERNEL); + + if (!scrq->ind_buf.indir_arr) + goto indir_failed; + + spin_lock_init(&scrq->lock); + + netdev_dbg(adapter->netdev, + "sub-crq initialized, num %lx, hw_irq=%lx, irq=%x\n", + scrq->crq_num, scrq->hw_irq, scrq->irq); + + return scrq; + +indir_failed: + do { + rc = plpar_hcall_norets(H_FREE_SUB_CRQ, + adapter->vdev->unit_address, + scrq->crq_num); + } while (rc == H_BUSY || rc == H_IS_LONG_BUSY(rc)); +reg_failed: + dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, + DMA_BIDIRECTIONAL); +map_failed: + free_pages((unsigned long)scrq->msgs, 2); +zero_page_failed: + kfree(scrq); + + return NULL; +} + +static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free) +{ + int i; + + if (adapter->tx_scrq) { + for (i = 0; i < adapter->num_active_tx_scrqs; i++) { + if (!adapter->tx_scrq[i]) + continue; + + netdev_dbg(adapter->netdev, "Releasing tx_scrq[%d]\n", + i); + ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]); + if (adapter->tx_scrq[i]->irq) { + free_irq(adapter->tx_scrq[i]->irq, + adapter->tx_scrq[i]); + irq_dispose_mapping(adapter->tx_scrq[i]->irq); + adapter->tx_scrq[i]->irq = 0; + } + + release_sub_crq_queue(adapter, adapter->tx_scrq[i], + do_h_free); + } + + kfree(adapter->tx_scrq); + adapter->tx_scrq = NULL; + adapter->num_active_tx_scrqs = 0; + } + + if (adapter->rx_scrq) { + for (i = 0; i < adapter->num_active_rx_scrqs; i++) { + if (!adapter->rx_scrq[i]) + continue; + + netdev_dbg(adapter->netdev, "Releasing rx_scrq[%d]\n", + i); + if (adapter->rx_scrq[i]->irq) { + free_irq(adapter->rx_scrq[i]->irq, + adapter->rx_scrq[i]); + irq_dispose_mapping(adapter->rx_scrq[i]->irq); + adapter->rx_scrq[i]->irq = 0; + } + + release_sub_crq_queue(adapter, adapter->rx_scrq[i], + do_h_free); + } + + kfree(adapter->rx_scrq); + adapter->rx_scrq = NULL; + adapter->num_active_rx_scrqs = 0; + } +} + +static int disable_scrq_irq(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq) +{ + struct device *dev = &adapter->vdev->dev; + unsigned long rc; + + rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, + H_DISABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0); + if (rc) + dev_err(dev, "Couldn't disable scrq irq 0x%lx. rc=%ld\n", + scrq->hw_irq, rc); + return rc; +} + +/* We can not use the IRQ chip EOI handler because that has the + * unintended effect of changing the interrupt priority. + */ +static void ibmvnic_xics_eoi(struct device *dev, struct ibmvnic_sub_crq_queue *scrq) +{ + u64 val = 0xff000000 | scrq->hw_irq; + unsigned long rc; + + rc = plpar_hcall_norets(H_EOI, val); + if (rc) + dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", val, rc); +} + +/* Due to a firmware bug, the hypervisor can send an interrupt to a + * transmit or receive queue just prior to a partition migration. + * Force an EOI after migration. + */ +static void ibmvnic_clear_pending_interrupt(struct device *dev, + struct ibmvnic_sub_crq_queue *scrq) +{ + if (!xive_enabled()) + ibmvnic_xics_eoi(dev, scrq); +} + +static int enable_scrq_irq(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq) +{ + struct device *dev = &adapter->vdev->dev; + unsigned long rc; + + if (scrq->hw_irq > 0x100000000ULL) { + dev_err(dev, "bad hw_irq = %lx\n", scrq->hw_irq); + return 1; + } + + if (test_bit(0, &adapter->resetting) && + adapter->reset_reason == VNIC_RESET_MOBILITY) { + ibmvnic_clear_pending_interrupt(dev, scrq); + } + + rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, + H_ENABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0); + if (rc) + dev_err(dev, "Couldn't enable scrq irq 0x%lx. rc=%ld\n", + scrq->hw_irq, rc); + return rc; +} + +static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq) +{ + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_tx_pool *tx_pool; + struct ibmvnic_tx_buff *txbuff; + struct netdev_queue *txq; + union sub_crq *next; + int index; + int i; + +restart_loop: + while (pending_scrq(adapter, scrq)) { + unsigned int pool = scrq->pool_index; + int num_entries = 0; + int total_bytes = 0; + int num_packets = 0; + + next = ibmvnic_next_scrq(adapter, scrq); + for (i = 0; i < next->tx_comp.num_comps; i++) { + index = be32_to_cpu(next->tx_comp.correlators[i]); + if (index & IBMVNIC_TSO_POOL_MASK) { + tx_pool = &adapter->tso_pool[pool]; + index &= ~IBMVNIC_TSO_POOL_MASK; + } else { + tx_pool = &adapter->tx_pool[pool]; + } + + txbuff = &tx_pool->tx_buff[index]; + num_packets++; + num_entries += txbuff->num_entries; + if (txbuff->skb) { + total_bytes += txbuff->skb->len; + if (next->tx_comp.rcs[i]) { + dev_err(dev, "tx error %x\n", + next->tx_comp.rcs[i]); + dev_kfree_skb_irq(txbuff->skb); + } else { + dev_consume_skb_irq(txbuff->skb); + } + txbuff->skb = NULL; + } else { + netdev_warn(adapter->netdev, + "TX completion received with NULL socket buffer\n"); + } + tx_pool->free_map[tx_pool->producer_index] = index; + tx_pool->producer_index = + (tx_pool->producer_index + 1) % + tx_pool->num_buffers; + } + /* remove tx_comp scrq*/ + next->tx_comp.first = 0; + + txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index); + netdev_tx_completed_queue(txq, num_packets, total_bytes); + + if (atomic_sub_return(num_entries, &scrq->used) <= + (adapter->req_tx_entries_per_subcrq / 2) && + __netif_subqueue_stopped(adapter->netdev, + scrq->pool_index)) { + rcu_read_lock(); + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, + scrq->pool_index); + netdev_dbg(adapter->netdev, + "Started queue %d\n", + scrq->pool_index); + } + rcu_read_unlock(); + } + } + + enable_scrq_irq(adapter, scrq); + + if (pending_scrq(adapter, scrq)) { + disable_scrq_irq(adapter, scrq); + goto restart_loop; + } + + return 0; +} + +static irqreturn_t ibmvnic_interrupt_tx(int irq, void *instance) +{ + struct ibmvnic_sub_crq_queue *scrq = instance; + struct ibmvnic_adapter *adapter = scrq->adapter; + + disable_scrq_irq(adapter, scrq); + ibmvnic_complete_tx(adapter, scrq); + + return IRQ_HANDLED; +} + +static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance) +{ + struct ibmvnic_sub_crq_queue *scrq = instance; + struct ibmvnic_adapter *adapter = scrq->adapter; + + /* When booting a kdump kernel we can hit pending interrupts + * prior to completing driver initialization. + */ + if (unlikely(adapter->state != VNIC_OPEN)) + return IRQ_NONE; + + adapter->rx_stats_buffers[scrq->scrq_num].interrupts++; + + if (napi_schedule_prep(&adapter->napi[scrq->scrq_num])) { + disable_scrq_irq(adapter, scrq); + __napi_schedule(&adapter->napi[scrq->scrq_num]); + } + + return IRQ_HANDLED; +} + +static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_sub_crq_queue *scrq; + int i = 0, j = 0; + int rc = 0; + + for (i = 0; i < adapter->req_tx_queues; i++) { + netdev_dbg(adapter->netdev, "Initializing tx_scrq[%d] irq\n", + i); + scrq = adapter->tx_scrq[i]; + scrq->irq = irq_create_mapping(NULL, scrq->hw_irq); + + if (!scrq->irq) { + rc = -EINVAL; + dev_err(dev, "Error mapping irq\n"); + goto req_tx_irq_failed; + } + + snprintf(scrq->name, sizeof(scrq->name), "ibmvnic-%x-tx%d", + adapter->vdev->unit_address, i); + rc = request_irq(scrq->irq, ibmvnic_interrupt_tx, + 0, scrq->name, scrq); + + if (rc) { + dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n", + scrq->irq, rc); + irq_dispose_mapping(scrq->irq); + goto req_tx_irq_failed; + } + } + + for (i = 0; i < adapter->req_rx_queues; i++) { + netdev_dbg(adapter->netdev, "Initializing rx_scrq[%d] irq\n", + i); + scrq = adapter->rx_scrq[i]; + scrq->irq = irq_create_mapping(NULL, scrq->hw_irq); + if (!scrq->irq) { + rc = -EINVAL; + dev_err(dev, "Error mapping irq\n"); + goto req_rx_irq_failed; + } + snprintf(scrq->name, sizeof(scrq->name), "ibmvnic-%x-rx%d", + adapter->vdev->unit_address, i); + rc = request_irq(scrq->irq, ibmvnic_interrupt_rx, + 0, scrq->name, scrq); + if (rc) { + dev_err(dev, "Couldn't register rx irq 0x%x. rc=%d\n", + scrq->irq, rc); + irq_dispose_mapping(scrq->irq); + goto req_rx_irq_failed; + } + } + return rc; + +req_rx_irq_failed: + for (j = 0; j < i; j++) { + free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]); + irq_dispose_mapping(adapter->rx_scrq[j]->irq); + } + i = adapter->req_tx_queues; +req_tx_irq_failed: + for (j = 0; j < i; j++) { + free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]); + irq_dispose_mapping(adapter->tx_scrq[j]->irq); + } + release_sub_crqs(adapter, 1); + return rc; +} + +static int init_sub_crqs(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_sub_crq_queue **allqueues; + int registered_queues = 0; + int total_queues; + int more = 0; + int i; + + total_queues = adapter->req_tx_queues + adapter->req_rx_queues; + + allqueues = kcalloc(total_queues, sizeof(*allqueues), GFP_KERNEL); + if (!allqueues) + return -ENOMEM; + + for (i = 0; i < total_queues; i++) { + allqueues[i] = init_sub_crq_queue(adapter); + if (!allqueues[i]) { + dev_warn(dev, "Couldn't allocate all sub-crqs\n"); + break; + } + registered_queues++; + } + + /* Make sure we were able to register the minimum number of queues */ + if (registered_queues < + adapter->min_tx_queues + adapter->min_rx_queues) { + dev_err(dev, "Fatal: Couldn't init min number of sub-crqs\n"); + goto tx_failed; + } + + /* Distribute the failed allocated queues*/ + for (i = 0; i < total_queues - registered_queues + more ; i++) { + netdev_dbg(adapter->netdev, "Reducing number of queues\n"); + switch (i % 3) { + case 0: + if (adapter->req_rx_queues > adapter->min_rx_queues) + adapter->req_rx_queues--; + else + more++; + break; + case 1: + if (adapter->req_tx_queues > adapter->min_tx_queues) + adapter->req_tx_queues--; + else + more++; + break; + } + } + + adapter->tx_scrq = kcalloc(adapter->req_tx_queues, + sizeof(*adapter->tx_scrq), GFP_KERNEL); + if (!adapter->tx_scrq) + goto tx_failed; + + for (i = 0; i < adapter->req_tx_queues; i++) { + adapter->tx_scrq[i] = allqueues[i]; + adapter->tx_scrq[i]->pool_index = i; + adapter->num_active_tx_scrqs++; + } + + adapter->rx_scrq = kcalloc(adapter->req_rx_queues, + sizeof(*adapter->rx_scrq), GFP_KERNEL); + if (!adapter->rx_scrq) + goto rx_failed; + + for (i = 0; i < adapter->req_rx_queues; i++) { + adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues]; + adapter->rx_scrq[i]->scrq_num = i; + adapter->num_active_rx_scrqs++; + } + + kfree(allqueues); + return 0; + +rx_failed: + kfree(adapter->tx_scrq); + adapter->tx_scrq = NULL; +tx_failed: + for (i = 0; i < registered_queues; i++) + release_sub_crq_queue(adapter, allqueues[i], 1); + kfree(allqueues); + return -ENOMEM; +} + +static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) +{ + struct device *dev = &adapter->vdev->dev; + union ibmvnic_crq crq; + int max_entries; + int cap_reqs; + + /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on + * the PROMISC flag). Initialize this count upfront. When the tasklet + * receives a response to all of these, it will send the next protocol + * message (QUERY_IP_OFFLOAD). + */ + if (!(adapter->netdev->flags & IFF_PROMISC) || + adapter->promisc_supported) + cap_reqs = 7; + else + cap_reqs = 6; + + if (!retry) { + /* Sub-CRQ entries are 32 byte long */ + int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4); + + atomic_set(&adapter->running_cap_crqs, cap_reqs); + + if (adapter->min_tx_entries_per_subcrq > entries_page || + adapter->min_rx_add_entries_per_subcrq > entries_page) { + dev_err(dev, "Fatal, invalid entries per sub-crq\n"); + return; + } + + if (adapter->desired.mtu) + adapter->req_mtu = adapter->desired.mtu; + else + adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN; + + if (!adapter->desired.tx_entries) + adapter->desired.tx_entries = + adapter->max_tx_entries_per_subcrq; + if (!adapter->desired.rx_entries) + adapter->desired.rx_entries = + adapter->max_rx_add_entries_per_subcrq; + + max_entries = IBMVNIC_LTB_SET_SIZE / + (adapter->req_mtu + IBMVNIC_BUFFER_HLEN); + + if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * + adapter->desired.tx_entries > IBMVNIC_LTB_SET_SIZE) { + adapter->desired.tx_entries = max_entries; + } + + if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * + adapter->desired.rx_entries > IBMVNIC_LTB_SET_SIZE) { + adapter->desired.rx_entries = max_entries; + } + + if (adapter->desired.tx_entries) + adapter->req_tx_entries_per_subcrq = + adapter->desired.tx_entries; + else + adapter->req_tx_entries_per_subcrq = + adapter->max_tx_entries_per_subcrq; + + if (adapter->desired.rx_entries) + adapter->req_rx_add_entries_per_subcrq = + adapter->desired.rx_entries; + else + adapter->req_rx_add_entries_per_subcrq = + adapter->max_rx_add_entries_per_subcrq; + + if (adapter->desired.tx_queues) + adapter->req_tx_queues = + adapter->desired.tx_queues; + else + adapter->req_tx_queues = + adapter->opt_tx_comp_sub_queues; + + if (adapter->desired.rx_queues) + adapter->req_rx_queues = + adapter->desired.rx_queues; + else + adapter->req_rx_queues = + adapter->opt_rx_comp_queues; + + adapter->req_rx_add_queues = adapter->max_rx_add_queues; + } else { + atomic_add(cap_reqs, &adapter->running_cap_crqs); + } + memset(&crq, 0, sizeof(crq)); + crq.request_capability.first = IBMVNIC_CRQ_CMD; + crq.request_capability.cmd = REQUEST_CAPABILITY; + + crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES); + crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + + crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES); + crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + + crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES); + crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + + crq.request_capability.capability = + cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ); + crq.request_capability.number = + cpu_to_be64(adapter->req_tx_entries_per_subcrq); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + + crq.request_capability.capability = + cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ); + crq.request_capability.number = + cpu_to_be64(adapter->req_rx_add_entries_per_subcrq); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + + crq.request_capability.capability = cpu_to_be16(REQ_MTU); + crq.request_capability.number = cpu_to_be64(adapter->req_mtu); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + + if (adapter->netdev->flags & IFF_PROMISC) { + if (adapter->promisc_supported) { + crq.request_capability.capability = + cpu_to_be16(PROMISC_REQUESTED); + crq.request_capability.number = cpu_to_be64(1); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + } + } else { + crq.request_capability.capability = + cpu_to_be16(PROMISC_REQUESTED); + crq.request_capability.number = cpu_to_be64(0); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + } + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); +} + +static int pending_scrq(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq) +{ + union sub_crq *entry = &scrq->msgs[scrq->cur]; + int rc; + + rc = !!(entry->generic.first & IBMVNIC_CRQ_CMD_RSP); + + /* Ensure that the SCRQ valid flag is loaded prior to loading the + * contents of the SCRQ descriptor + */ + dma_rmb(); + + return rc; +} + +static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq) +{ + union sub_crq *entry; + unsigned long flags; + + spin_lock_irqsave(&scrq->lock, flags); + entry = &scrq->msgs[scrq->cur]; + if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP) { + if (++scrq->cur == scrq->size) + scrq->cur = 0; + } else { + entry = NULL; + } + spin_unlock_irqrestore(&scrq->lock, flags); + + /* Ensure that the SCRQ valid flag is loaded prior to loading the + * contents of the SCRQ descriptor + */ + dma_rmb(); + + return entry; +} + +static union ibmvnic_crq *ibmvnic_next_crq(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_crq_queue *queue = &adapter->crq; + union ibmvnic_crq *crq; + + crq = &queue->msgs[queue->cur]; + if (crq->generic.first & IBMVNIC_CRQ_CMD_RSP) { + if (++queue->cur == queue->size) + queue->cur = 0; + } else { + crq = NULL; + } + + return crq; +} + +static void print_subcrq_error(struct device *dev, int rc, const char *func) +{ + switch (rc) { + case H_PARAMETER: + dev_warn_ratelimited(dev, + "%s failed: Send request is malformed or adapter failover pending. (rc=%d)\n", + func, rc); + break; + case H_CLOSED: + dev_warn_ratelimited(dev, + "%s failed: Backing queue closed. Adapter is down or failover pending. (rc=%d)\n", + func, rc); + break; + default: + dev_err_ratelimited(dev, "%s failed: (rc=%d)\n", func, rc); + break; + } +} + +static int send_subcrq_indirect(struct ibmvnic_adapter *adapter, + u64 remote_handle, u64 ioba, u64 num_entries) +{ + unsigned int ua = adapter->vdev->unit_address; + struct device *dev = &adapter->vdev->dev; + int rc; + + /* Make sure the hypervisor sees the complete request */ + dma_wmb(); + rc = plpar_hcall_norets(H_SEND_SUB_CRQ_INDIRECT, ua, + cpu_to_be64(remote_handle), + ioba, num_entries); + + if (rc) + print_subcrq_error(dev, rc, __func__); + + return rc; +} + +static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, + union ibmvnic_crq *crq) +{ + unsigned int ua = adapter->vdev->unit_address; + struct device *dev = &adapter->vdev->dev; + u64 *u64_crq = (u64 *)crq; + int rc; + + netdev_dbg(adapter->netdev, "Sending CRQ: %016lx %016lx\n", + (unsigned long)cpu_to_be64(u64_crq[0]), + (unsigned long)cpu_to_be64(u64_crq[1])); + + if (!adapter->crq.active && + crq->generic.first != IBMVNIC_CRQ_INIT_CMD) { + dev_warn(dev, "Invalid request detected while CRQ is inactive, possible device state change during reset\n"); + return -EINVAL; + } + + /* Make sure the hypervisor sees the complete request */ + dma_wmb(); + + rc = plpar_hcall_norets(H_SEND_CRQ, ua, + cpu_to_be64(u64_crq[0]), + cpu_to_be64(u64_crq[1])); + + if (rc) { + if (rc == H_CLOSED) { + dev_warn(dev, "CRQ Queue closed\n"); + /* do not reset, report the fail, wait for passive init from server */ + } + + dev_warn(dev, "Send error (rc=%d)\n", rc); + } + + return rc; +} + +static int ibmvnic_send_crq_init(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + union ibmvnic_crq crq; + int retries = 100; + int rc; + + memset(&crq, 0, sizeof(crq)); + crq.generic.first = IBMVNIC_CRQ_INIT_CMD; + crq.generic.cmd = IBMVNIC_CRQ_INIT; + netdev_dbg(adapter->netdev, "Sending CRQ init\n"); + + do { + rc = ibmvnic_send_crq(adapter, &crq); + if (rc != H_CLOSED) + break; + retries--; + msleep(50); + + } while (retries > 0); + + if (rc) { + dev_err(dev, "Failed to send init request, rc = %d\n", rc); + return rc; + } + + return 0; +} + +struct vnic_login_client_data { + u8 type; + __be16 len; + char name[]; +} __packed; + +static int vnic_client_data_len(struct ibmvnic_adapter *adapter) +{ + int len; + + /* Calculate the amount of buffer space needed for the + * vnic client data in the login buffer. There are four entries, + * OS name, LPAR name, device name, and a null last entry. + */ + len = 4 * sizeof(struct vnic_login_client_data); + len += 6; /* "Linux" plus NULL */ + len += strlen(utsname()->nodename) + 1; + len += strlen(adapter->netdev->name) + 1; + + return len; +} + +static void vnic_add_client_data(struct ibmvnic_adapter *adapter, + struct vnic_login_client_data *vlcd) +{ + const char *os_name = "Linux"; + int len; + + /* Type 1 - LPAR OS */ + vlcd->type = 1; + len = strlen(os_name) + 1; + vlcd->len = cpu_to_be16(len); + strscpy(vlcd->name, os_name, len); + vlcd = (struct vnic_login_client_data *)(vlcd->name + len); + + /* Type 2 - LPAR name */ + vlcd->type = 2; + len = strlen(utsname()->nodename) + 1; + vlcd->len = cpu_to_be16(len); + strscpy(vlcd->name, utsname()->nodename, len); + vlcd = (struct vnic_login_client_data *)(vlcd->name + len); + + /* Type 3 - device name */ + vlcd->type = 3; + len = strlen(adapter->netdev->name) + 1; + vlcd->len = cpu_to_be16(len); + strscpy(vlcd->name, adapter->netdev->name, len); +} + +static int send_login(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_login_rsp_buffer *login_rsp_buffer; + struct ibmvnic_login_buffer *login_buffer; + struct device *dev = &adapter->vdev->dev; + struct vnic_login_client_data *vlcd; + dma_addr_t rsp_buffer_token; + dma_addr_t buffer_token; + size_t rsp_buffer_size; + union ibmvnic_crq crq; + int client_data_len; + size_t buffer_size; + __be64 *tx_list_p; + __be64 *rx_list_p; + int rc; + int i; + + if (!adapter->tx_scrq || !adapter->rx_scrq) { + netdev_err(adapter->netdev, + "RX or TX queues are not allocated, device login failed\n"); + return -ENOMEM; + } + + release_login_buffer(adapter); + release_login_rsp_buffer(adapter); + + client_data_len = vnic_client_data_len(adapter); + + buffer_size = + sizeof(struct ibmvnic_login_buffer) + + sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues) + + client_data_len; + + login_buffer = kzalloc(buffer_size, GFP_ATOMIC); + if (!login_buffer) + goto buf_alloc_failed; + + buffer_token = dma_map_single(dev, login_buffer, buffer_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, buffer_token)) { + dev_err(dev, "Couldn't map login buffer\n"); + goto buf_map_failed; + } + + rsp_buffer_size = sizeof(struct ibmvnic_login_rsp_buffer) + + sizeof(u64) * adapter->req_tx_queues + + sizeof(u64) * adapter->req_rx_queues + + sizeof(u64) * adapter->req_rx_queues + + sizeof(u8) * IBMVNIC_TX_DESC_VERSIONS; + + login_rsp_buffer = kmalloc(rsp_buffer_size, GFP_ATOMIC); + if (!login_rsp_buffer) + goto buf_rsp_alloc_failed; + + rsp_buffer_token = dma_map_single(dev, login_rsp_buffer, + rsp_buffer_size, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, rsp_buffer_token)) { + dev_err(dev, "Couldn't map login rsp buffer\n"); + goto buf_rsp_map_failed; + } + + adapter->login_buf = login_buffer; + adapter->login_buf_token = buffer_token; + adapter->login_buf_sz = buffer_size; + adapter->login_rsp_buf = login_rsp_buffer; + adapter->login_rsp_buf_token = rsp_buffer_token; + adapter->login_rsp_buf_sz = rsp_buffer_size; + + login_buffer->len = cpu_to_be32(buffer_size); + login_buffer->version = cpu_to_be32(INITIAL_VERSION_LB); + login_buffer->num_txcomp_subcrqs = cpu_to_be32(adapter->req_tx_queues); + login_buffer->off_txcomp_subcrqs = + cpu_to_be32(sizeof(struct ibmvnic_login_buffer)); + login_buffer->num_rxcomp_subcrqs = cpu_to_be32(adapter->req_rx_queues); + login_buffer->off_rxcomp_subcrqs = + cpu_to_be32(sizeof(struct ibmvnic_login_buffer) + + sizeof(u64) * adapter->req_tx_queues); + login_buffer->login_rsp_ioba = cpu_to_be32(rsp_buffer_token); + login_buffer->login_rsp_len = cpu_to_be32(rsp_buffer_size); + + tx_list_p = (__be64 *)((char *)login_buffer + + sizeof(struct ibmvnic_login_buffer)); + rx_list_p = (__be64 *)((char *)login_buffer + + sizeof(struct ibmvnic_login_buffer) + + sizeof(u64) * adapter->req_tx_queues); + + for (i = 0; i < adapter->req_tx_queues; i++) { + if (adapter->tx_scrq[i]) { + tx_list_p[i] = + cpu_to_be64(adapter->tx_scrq[i]->crq_num); + } + } + + for (i = 0; i < adapter->req_rx_queues; i++) { + if (adapter->rx_scrq[i]) { + rx_list_p[i] = + cpu_to_be64(adapter->rx_scrq[i]->crq_num); + } + } + + /* Insert vNIC login client data */ + vlcd = (struct vnic_login_client_data *) + ((char *)rx_list_p + (sizeof(u64) * adapter->req_rx_queues)); + login_buffer->client_data_offset = + cpu_to_be32((char *)vlcd - (char *)login_buffer); + login_buffer->client_data_len = cpu_to_be32(client_data_len); + + vnic_add_client_data(adapter, vlcd); + + netdev_dbg(adapter->netdev, "Login Buffer:\n"); + for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) { + netdev_dbg(adapter->netdev, "%016lx\n", + ((unsigned long *)(adapter->login_buf))[i]); + } + + memset(&crq, 0, sizeof(crq)); + crq.login.first = IBMVNIC_CRQ_CMD; + crq.login.cmd = LOGIN; + crq.login.ioba = cpu_to_be32(buffer_token); + crq.login.len = cpu_to_be32(buffer_size); + + adapter->login_pending = true; + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + adapter->login_pending = false; + netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc); + goto buf_send_failed; + } + + return 0; + +buf_send_failed: + dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size, + DMA_FROM_DEVICE); +buf_rsp_map_failed: + kfree(login_rsp_buffer); + adapter->login_rsp_buf = NULL; +buf_rsp_alloc_failed: + dma_unmap_single(dev, buffer_token, buffer_size, DMA_TO_DEVICE); +buf_map_failed: + kfree(login_buffer); + adapter->login_buf = NULL; +buf_alloc_failed: + return -ENOMEM; +} + +static int send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr, + u32 len, u8 map_id) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.request_map.first = IBMVNIC_CRQ_CMD; + crq.request_map.cmd = REQUEST_MAP; + crq.request_map.map_id = map_id; + crq.request_map.ioba = cpu_to_be32(addr); + crq.request_map.len = cpu_to_be32(len); + return ibmvnic_send_crq(adapter, &crq); +} + +static int send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.request_unmap.first = IBMVNIC_CRQ_CMD; + crq.request_unmap.cmd = REQUEST_UNMAP; + crq.request_unmap.map_id = map_id; + return ibmvnic_send_crq(adapter, &crq); +} + +static void send_query_map(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.query_map.first = IBMVNIC_CRQ_CMD; + crq.query_map.cmd = QUERY_MAP; + ibmvnic_send_crq(adapter, &crq); +} + +/* Send a series of CRQs requesting various capabilities of the VNIC server */ +static void send_query_cap(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + int cap_reqs; + + /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count + * upfront. When the tasklet receives a response to all of these, it + * can send out the next protocol messaage (REQUEST_CAPABILITY). + */ + cap_reqs = 25; + + atomic_set(&adapter->running_cap_crqs, cap_reqs); + + memset(&crq, 0, sizeof(crq)); + crq.query_capability.first = IBMVNIC_CRQ_CMD; + crq.query_capability.cmd = QUERY_CAPABILITY; + + crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MIN_MTU); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MAX_MTU); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ); + + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); +} + +static void send_query_ip_offload(struct ibmvnic_adapter *adapter) +{ + int buf_sz = sizeof(struct ibmvnic_query_ip_offload_buffer); + struct device *dev = &adapter->vdev->dev; + union ibmvnic_crq crq; + + adapter->ip_offload_tok = + dma_map_single(dev, + &adapter->ip_offload_buf, + buf_sz, + DMA_FROM_DEVICE); + + if (dma_mapping_error(dev, adapter->ip_offload_tok)) { + if (!firmware_has_feature(FW_FEATURE_CMO)) + dev_err(dev, "Couldn't map offload buffer\n"); + return; + } + + memset(&crq, 0, sizeof(crq)); + crq.query_ip_offload.first = IBMVNIC_CRQ_CMD; + crq.query_ip_offload.cmd = QUERY_IP_OFFLOAD; + crq.query_ip_offload.len = cpu_to_be32(buf_sz); + crq.query_ip_offload.ioba = + cpu_to_be32(adapter->ip_offload_tok); + + ibmvnic_send_crq(adapter, &crq); +} + +static void send_control_ip_offload(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_control_ip_offload_buffer *ctrl_buf = &adapter->ip_offload_ctrl; + struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; + struct device *dev = &adapter->vdev->dev; + netdev_features_t old_hw_features = 0; + union ibmvnic_crq crq; + + adapter->ip_offload_ctrl_tok = + dma_map_single(dev, + ctrl_buf, + sizeof(adapter->ip_offload_ctrl), + DMA_TO_DEVICE); + + if (dma_mapping_error(dev, adapter->ip_offload_ctrl_tok)) { + dev_err(dev, "Couldn't map ip offload control buffer\n"); + return; + } + + ctrl_buf->len = cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); + ctrl_buf->version = cpu_to_be32(INITIAL_VERSION_IOB); + ctrl_buf->ipv4_chksum = buf->ipv4_chksum; + ctrl_buf->ipv6_chksum = buf->ipv6_chksum; + ctrl_buf->tcp_ipv4_chksum = buf->tcp_ipv4_chksum; + ctrl_buf->udp_ipv4_chksum = buf->udp_ipv4_chksum; + ctrl_buf->tcp_ipv6_chksum = buf->tcp_ipv6_chksum; + ctrl_buf->udp_ipv6_chksum = buf->udp_ipv6_chksum; + ctrl_buf->large_tx_ipv4 = buf->large_tx_ipv4; + ctrl_buf->large_tx_ipv6 = buf->large_tx_ipv6; + + /* large_rx disabled for now, additional features needed */ + ctrl_buf->large_rx_ipv4 = 0; + ctrl_buf->large_rx_ipv6 = 0; + + if (adapter->state != VNIC_PROBING) { + old_hw_features = adapter->netdev->hw_features; + adapter->netdev->hw_features = 0; + } + + adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; + + if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) + adapter->netdev->hw_features |= NETIF_F_IP_CSUM; + + if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum) + adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM; + + if ((adapter->netdev->features & + (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) + adapter->netdev->hw_features |= NETIF_F_RXCSUM; + + if (buf->large_tx_ipv4) + adapter->netdev->hw_features |= NETIF_F_TSO; + if (buf->large_tx_ipv6) + adapter->netdev->hw_features |= NETIF_F_TSO6; + + if (adapter->state == VNIC_PROBING) { + adapter->netdev->features |= adapter->netdev->hw_features; + } else if (old_hw_features != adapter->netdev->hw_features) { + netdev_features_t tmp = 0; + + /* disable features no longer supported */ + adapter->netdev->features &= adapter->netdev->hw_features; + /* turn on features now supported if previously enabled */ + tmp = (old_hw_features ^ adapter->netdev->hw_features) & + adapter->netdev->hw_features; + adapter->netdev->features |= + tmp & adapter->netdev->wanted_features; + } + + memset(&crq, 0, sizeof(crq)); + crq.control_ip_offload.first = IBMVNIC_CRQ_CMD; + crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD; + crq.control_ip_offload.len = + cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); + crq.control_ip_offload.ioba = cpu_to_be32(adapter->ip_offload_ctrl_tok); + ibmvnic_send_crq(adapter, &crq); +} + +static void handle_vpd_size_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + + if (crq->get_vpd_size_rsp.rc.code) { + dev_err(dev, "Error retrieving VPD size, rc=%x\n", + crq->get_vpd_size_rsp.rc.code); + complete(&adapter->fw_done); + return; + } + + adapter->vpd->len = be64_to_cpu(crq->get_vpd_size_rsp.len); + complete(&adapter->fw_done); +} + +static void handle_vpd_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + unsigned char *substr = NULL; + u8 fw_level_len = 0; + + memset(adapter->fw_version, 0, 32); + + dma_unmap_single(dev, adapter->vpd->dma_addr, adapter->vpd->len, + DMA_FROM_DEVICE); + + if (crq->get_vpd_rsp.rc.code) { + dev_err(dev, "Error retrieving VPD from device, rc=%x\n", + crq->get_vpd_rsp.rc.code); + goto complete; + } + + /* get the position of the firmware version info + * located after the ASCII 'RM' substring in the buffer + */ + substr = strnstr(adapter->vpd->buff, "RM", adapter->vpd->len); + if (!substr) { + dev_info(dev, "Warning - No FW level has been provided in the VPD buffer by the VIOS Server\n"); + goto complete; + } + + /* get length of firmware level ASCII substring */ + if ((substr + 2) < (adapter->vpd->buff + adapter->vpd->len)) { + fw_level_len = *(substr + 2); + } else { + dev_info(dev, "Length of FW substr extrapolated VDP buff\n"); + goto complete; + } + + /* copy firmware version string from vpd into adapter */ + if ((substr + 3 + fw_level_len) < + (adapter->vpd->buff + adapter->vpd->len)) { + strncpy((char *)adapter->fw_version, substr + 3, fw_level_len); + } else { + dev_info(dev, "FW substr extrapolated VPD buff\n"); + } + +complete: + if (adapter->fw_version[0] == '\0') + strscpy((char *)adapter->fw_version, "N/A", sizeof(adapter->fw_version)); + complete(&adapter->fw_done); +} + +static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; + int i; + + dma_unmap_single(dev, adapter->ip_offload_tok, + sizeof(adapter->ip_offload_buf), DMA_FROM_DEVICE); + + netdev_dbg(adapter->netdev, "Query IP Offload Buffer:\n"); + for (i = 0; i < (sizeof(adapter->ip_offload_buf) - 1) / 8 + 1; i++) + netdev_dbg(adapter->netdev, "%016lx\n", + ((unsigned long *)(buf))[i]); + + netdev_dbg(adapter->netdev, "ipv4_chksum = %d\n", buf->ipv4_chksum); + netdev_dbg(adapter->netdev, "ipv6_chksum = %d\n", buf->ipv6_chksum); + netdev_dbg(adapter->netdev, "tcp_ipv4_chksum = %d\n", + buf->tcp_ipv4_chksum); + netdev_dbg(adapter->netdev, "tcp_ipv6_chksum = %d\n", + buf->tcp_ipv6_chksum); + netdev_dbg(adapter->netdev, "udp_ipv4_chksum = %d\n", + buf->udp_ipv4_chksum); + netdev_dbg(adapter->netdev, "udp_ipv6_chksum = %d\n", + buf->udp_ipv6_chksum); + netdev_dbg(adapter->netdev, "large_tx_ipv4 = %d\n", + buf->large_tx_ipv4); + netdev_dbg(adapter->netdev, "large_tx_ipv6 = %d\n", + buf->large_tx_ipv6); + netdev_dbg(adapter->netdev, "large_rx_ipv4 = %d\n", + buf->large_rx_ipv4); + netdev_dbg(adapter->netdev, "large_rx_ipv6 = %d\n", + buf->large_rx_ipv6); + netdev_dbg(adapter->netdev, "max_ipv4_hdr_sz = %d\n", + buf->max_ipv4_header_size); + netdev_dbg(adapter->netdev, "max_ipv6_hdr_sz = %d\n", + buf->max_ipv6_header_size); + netdev_dbg(adapter->netdev, "max_tcp_hdr_size = %d\n", + buf->max_tcp_header_size); + netdev_dbg(adapter->netdev, "max_udp_hdr_size = %d\n", + buf->max_udp_header_size); + netdev_dbg(adapter->netdev, "max_large_tx_size = %d\n", + buf->max_large_tx_size); + netdev_dbg(adapter->netdev, "max_large_rx_size = %d\n", + buf->max_large_rx_size); + netdev_dbg(adapter->netdev, "ipv6_ext_hdr = %d\n", + buf->ipv6_extension_header); + netdev_dbg(adapter->netdev, "tcp_pseudosum_req = %d\n", + buf->tcp_pseudosum_req); + netdev_dbg(adapter->netdev, "num_ipv6_ext_hd = %d\n", + buf->num_ipv6_ext_headers); + netdev_dbg(adapter->netdev, "off_ipv6_ext_hd = %d\n", + buf->off_ipv6_ext_headers); + + send_control_ip_offload(adapter); +} + +static const char *ibmvnic_fw_err_cause(u16 cause) +{ + switch (cause) { + case ADAPTER_PROBLEM: + return "adapter problem"; + case BUS_PROBLEM: + return "bus problem"; + case FW_PROBLEM: + return "firmware problem"; + case DD_PROBLEM: + return "device driver problem"; + case EEH_RECOVERY: + return "EEH recovery"; + case FW_UPDATED: + return "firmware updated"; + case LOW_MEMORY: + return "low Memory"; + default: + return "unknown"; + } +} + +static void handle_error_indication(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + u16 cause; + + cause = be16_to_cpu(crq->error_indication.error_cause); + + dev_warn_ratelimited(dev, + "Firmware reports %serror, cause: %s. Starting recovery...\n", + crq->error_indication.flags + & IBMVNIC_FATAL_ERROR ? "FATAL " : "", + ibmvnic_fw_err_cause(cause)); + + if (crq->error_indication.flags & IBMVNIC_FATAL_ERROR) + ibmvnic_reset(adapter, VNIC_RESET_FATAL); + else + ibmvnic_reset(adapter, VNIC_RESET_NON_FATAL); +} + +static int handle_change_mac_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->vdev->dev; + long rc; + + rc = crq->change_mac_addr_rsp.rc.code; + if (rc) { + dev_err(dev, "Error %ld in CHANGE_MAC_ADDR_RSP\n", rc); + goto out; + } + /* crq->change_mac_addr.mac_addr is the requested one + * crq->change_mac_addr_rsp.mac_addr is the returned valid one. + */ + eth_hw_addr_set(netdev, &crq->change_mac_addr_rsp.mac_addr[0]); + ether_addr_copy(adapter->mac_addr, + &crq->change_mac_addr_rsp.mac_addr[0]); +out: + complete(&adapter->fw_done); + return rc; +} + +static void handle_request_cap_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + u64 *req_value; + char *name; + + atomic_dec(&adapter->running_cap_crqs); + netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n", + atomic_read(&adapter->running_cap_crqs)); + switch (be16_to_cpu(crq->request_capability_rsp.capability)) { + case REQ_TX_QUEUES: + req_value = &adapter->req_tx_queues; + name = "tx"; + break; + case REQ_RX_QUEUES: + req_value = &adapter->req_rx_queues; + name = "rx"; + break; + case REQ_RX_ADD_QUEUES: + req_value = &adapter->req_rx_add_queues; + name = "rx_add"; + break; + case REQ_TX_ENTRIES_PER_SUBCRQ: + req_value = &adapter->req_tx_entries_per_subcrq; + name = "tx_entries_per_subcrq"; + break; + case REQ_RX_ADD_ENTRIES_PER_SUBCRQ: + req_value = &adapter->req_rx_add_entries_per_subcrq; + name = "rx_add_entries_per_subcrq"; + break; + case REQ_MTU: + req_value = &adapter->req_mtu; + name = "mtu"; + break; + case PROMISC_REQUESTED: + req_value = &adapter->promisc; + name = "promisc"; + break; + default: + dev_err(dev, "Got invalid cap request rsp %d\n", + crq->request_capability.capability); + return; + } + + switch (crq->request_capability_rsp.rc.code) { + case SUCCESS: + break; + case PARTIALSUCCESS: + dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n", + *req_value, + (long)be64_to_cpu(crq->request_capability_rsp.number), + name); + + if (be16_to_cpu(crq->request_capability_rsp.capability) == + REQ_MTU) { + pr_err("mtu of %llu is not supported. Reverting.\n", + *req_value); + *req_value = adapter->fallback.mtu; + } else { + *req_value = + be64_to_cpu(crq->request_capability_rsp.number); + } + + send_request_cap(adapter, 1); + return; + default: + dev_err(dev, "Error %d in request cap rsp\n", + crq->request_capability_rsp.rc.code); + return; + } + + /* Done receiving requested capabilities, query IP offload support */ + if (atomic_read(&adapter->running_cap_crqs) == 0) + send_query_ip_offload(adapter); +} + +static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + struct net_device *netdev = adapter->netdev; + struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf; + struct ibmvnic_login_buffer *login = adapter->login_buf; + u64 *tx_handle_array; + u64 *rx_handle_array; + int num_tx_pools; + int num_rx_pools; + u64 *size_array; + u32 rsp_len; + int i; + + /* CHECK: Test/set of login_pending does not need to be atomic + * because only ibmvnic_tasklet tests/clears this. + */ + if (!adapter->login_pending) { + netdev_warn(netdev, "Ignoring unexpected login response\n"); + return 0; + } + adapter->login_pending = false; + + /* If the number of queues requested can't be allocated by the + * server, the login response will return with code 1. We will need + * to resend the login buffer with fewer queues requested. + */ + if (login_rsp_crq->generic.rc.code) { + adapter->init_done_rc = login_rsp_crq->generic.rc.code; + complete(&adapter->init_done); + return 0; + } + + if (adapter->failover_pending) { + adapter->init_done_rc = -EAGAIN; + netdev_dbg(netdev, "Failover pending, ignoring login response\n"); + complete(&adapter->init_done); + /* login response buffer will be released on reset */ + return 0; + } + + netdev->mtu = adapter->req_mtu - ETH_HLEN; + + netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); + for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) { + netdev_dbg(adapter->netdev, "%016lx\n", + ((unsigned long *)(adapter->login_rsp_buf))[i]); + } + + /* Sanity checks */ + if (login->num_txcomp_subcrqs != login_rsp->num_txsubm_subcrqs || + (be32_to_cpu(login->num_rxcomp_subcrqs) * + adapter->req_rx_add_queues != + be32_to_cpu(login_rsp->num_rxadd_subcrqs))) { + dev_err(dev, "FATAL: Inconsistent login and login rsp\n"); + ibmvnic_reset(adapter, VNIC_RESET_FATAL); + return -EIO; + } + + rsp_len = be32_to_cpu(login_rsp->len); + if (be32_to_cpu(login->login_rsp_len) < rsp_len || + rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) || + rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) || + rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) || + rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) { + /* This can happen if a login request times out and there are + * 2 outstanding login requests sent, the LOGIN_RSP crq + * could have been for the older login request. So we are + * parsing the newer response buffer which may be incomplete + */ + dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n"); + ibmvnic_reset(adapter, VNIC_RESET_FATAL); + return -EIO; + } + + size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); + /* variable buffer sizes are not supported, so just read the + * first entry. + */ + adapter->cur_rx_buf_sz = be64_to_cpu(size_array[0]); + + num_tx_pools = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); + num_rx_pools = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); + + tx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs)); + rx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_subcrqs)); + + for (i = 0; i < num_tx_pools; i++) + adapter->tx_scrq[i]->handle = tx_handle_array[i]; + + for (i = 0; i < num_rx_pools; i++) + adapter->rx_scrq[i]->handle = rx_handle_array[i]; + + adapter->num_active_tx_scrqs = num_tx_pools; + adapter->num_active_rx_scrqs = num_rx_pools; + release_login_rsp_buffer(adapter); + release_login_buffer(adapter); + complete(&adapter->init_done); + + return 0; +} + +static void handle_request_unmap_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + long rc; + + rc = crq->request_unmap_rsp.rc.code; + if (rc) + dev_err(dev, "Error %ld in REQUEST_UNMAP_RSP\n", rc); +} + +static void handle_query_map_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->vdev->dev; + long rc; + + rc = crq->query_map_rsp.rc.code; + if (rc) { + dev_err(dev, "Error %ld in QUERY_MAP_RSP\n", rc); + return; + } + netdev_dbg(netdev, "page_size = %d\ntot_pages = %u\nfree_pages = %u\n", + crq->query_map_rsp.page_size, + __be32_to_cpu(crq->query_map_rsp.tot_pages), + __be32_to_cpu(crq->query_map_rsp.free_pages)); +} + +static void handle_query_cap_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->vdev->dev; + long rc; + + atomic_dec(&adapter->running_cap_crqs); + netdev_dbg(netdev, "Outstanding queries: %d\n", + atomic_read(&adapter->running_cap_crqs)); + rc = crq->query_capability.rc.code; + if (rc) { + dev_err(dev, "Error %ld in QUERY_CAP_RSP\n", rc); + goto out; + } + + switch (be16_to_cpu(crq->query_capability.capability)) { + case MIN_TX_QUEUES: + adapter->min_tx_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "min_tx_queues = %lld\n", + adapter->min_tx_queues); + break; + case MIN_RX_QUEUES: + adapter->min_rx_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "min_rx_queues = %lld\n", + adapter->min_rx_queues); + break; + case MIN_RX_ADD_QUEUES: + adapter->min_rx_add_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "min_rx_add_queues = %lld\n", + adapter->min_rx_add_queues); + break; + case MAX_TX_QUEUES: + adapter->max_tx_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_tx_queues = %lld\n", + adapter->max_tx_queues); + break; + case MAX_RX_QUEUES: + adapter->max_rx_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_rx_queues = %lld\n", + adapter->max_rx_queues); + break; + case MAX_RX_ADD_QUEUES: + adapter->max_rx_add_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_rx_add_queues = %lld\n", + adapter->max_rx_add_queues); + break; + case MIN_TX_ENTRIES_PER_SUBCRQ: + adapter->min_tx_entries_per_subcrq = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "min_tx_entries_per_subcrq = %lld\n", + adapter->min_tx_entries_per_subcrq); + break; + case MIN_RX_ADD_ENTRIES_PER_SUBCRQ: + adapter->min_rx_add_entries_per_subcrq = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "min_rx_add_entrs_per_subcrq = %lld\n", + adapter->min_rx_add_entries_per_subcrq); + break; + case MAX_TX_ENTRIES_PER_SUBCRQ: + adapter->max_tx_entries_per_subcrq = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_tx_entries_per_subcrq = %lld\n", + adapter->max_tx_entries_per_subcrq); + break; + case MAX_RX_ADD_ENTRIES_PER_SUBCRQ: + adapter->max_rx_add_entries_per_subcrq = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_rx_add_entrs_per_subcrq = %lld\n", + adapter->max_rx_add_entries_per_subcrq); + break; + case TCP_IP_OFFLOAD: + adapter->tcp_ip_offload = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "tcp_ip_offload = %lld\n", + adapter->tcp_ip_offload); + break; + case PROMISC_SUPPORTED: + adapter->promisc_supported = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "promisc_supported = %lld\n", + adapter->promisc_supported); + break; + case MIN_MTU: + adapter->min_mtu = be64_to_cpu(crq->query_capability.number); + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; + netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu); + break; + case MAX_MTU: + adapter->max_mtu = be64_to_cpu(crq->query_capability.number); + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; + netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu); + break; + case MAX_MULTICAST_FILTERS: + adapter->max_multicast_filters = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_multicast_filters = %lld\n", + adapter->max_multicast_filters); + break; + case VLAN_HEADER_INSERTION: + adapter->vlan_header_insertion = + be64_to_cpu(crq->query_capability.number); + if (adapter->vlan_header_insertion) + netdev->features |= NETIF_F_HW_VLAN_STAG_TX; + netdev_dbg(netdev, "vlan_header_insertion = %lld\n", + adapter->vlan_header_insertion); + break; + case RX_VLAN_HEADER_INSERTION: + adapter->rx_vlan_header_insertion = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "rx_vlan_header_insertion = %lld\n", + adapter->rx_vlan_header_insertion); + break; + case MAX_TX_SG_ENTRIES: + adapter->max_tx_sg_entries = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_tx_sg_entries = %lld\n", + adapter->max_tx_sg_entries); + break; + case RX_SG_SUPPORTED: + adapter->rx_sg_supported = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "rx_sg_supported = %lld\n", + adapter->rx_sg_supported); + break; + case OPT_TX_COMP_SUB_QUEUES: + adapter->opt_tx_comp_sub_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "opt_tx_comp_sub_queues = %lld\n", + adapter->opt_tx_comp_sub_queues); + break; + case OPT_RX_COMP_QUEUES: + adapter->opt_rx_comp_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "opt_rx_comp_queues = %lld\n", + adapter->opt_rx_comp_queues); + break; + case OPT_RX_BUFADD_Q_PER_RX_COMP_Q: + adapter->opt_rx_bufadd_q_per_rx_comp_q = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "opt_rx_bufadd_q_per_rx_comp_q = %lld\n", + adapter->opt_rx_bufadd_q_per_rx_comp_q); + break; + case OPT_TX_ENTRIES_PER_SUBCRQ: + adapter->opt_tx_entries_per_subcrq = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "opt_tx_entries_per_subcrq = %lld\n", + adapter->opt_tx_entries_per_subcrq); + break; + case OPT_RXBA_ENTRIES_PER_SUBCRQ: + adapter->opt_rxba_entries_per_subcrq = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "opt_rxba_entries_per_subcrq = %lld\n", + adapter->opt_rxba_entries_per_subcrq); + break; + case TX_RX_DESC_REQ: + adapter->tx_rx_desc_req = crq->query_capability.number; + netdev_dbg(netdev, "tx_rx_desc_req = %llx\n", + adapter->tx_rx_desc_req); + break; + + default: + netdev_err(netdev, "Got invalid cap rsp %d\n", + crq->query_capability.capability); + } + +out: + if (atomic_read(&adapter->running_cap_crqs) == 0) + send_request_cap(adapter, 0); +} + +static int send_query_phys_parms(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + int rc; + + memset(&crq, 0, sizeof(crq)); + crq.query_phys_parms.first = IBMVNIC_CRQ_CMD; + crq.query_phys_parms.cmd = QUERY_PHYS_PARMS; + + mutex_lock(&adapter->fw_lock); + adapter->fw_done_rc = 0; + reinit_completion(&adapter->fw_done); + + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + mutex_unlock(&adapter->fw_lock); + return rc; + } + + rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); + if (rc) { + mutex_unlock(&adapter->fw_lock); + return rc; + } + + mutex_unlock(&adapter->fw_lock); + return adapter->fw_done_rc ? -EIO : 0; +} + +static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int rc; + __be32 rspeed = cpu_to_be32(crq->query_phys_parms_rsp.speed); + + rc = crq->query_phys_parms_rsp.rc.code; + if (rc) { + netdev_err(netdev, "Error %d in QUERY_PHYS_PARMS\n", rc); + return rc; + } + switch (rspeed) { + case IBMVNIC_10MBPS: + adapter->speed = SPEED_10; + break; + case IBMVNIC_100MBPS: + adapter->speed = SPEED_100; + break; + case IBMVNIC_1GBPS: + adapter->speed = SPEED_1000; + break; + case IBMVNIC_10GBPS: + adapter->speed = SPEED_10000; + break; + case IBMVNIC_25GBPS: + adapter->speed = SPEED_25000; + break; + case IBMVNIC_40GBPS: + adapter->speed = SPEED_40000; + break; + case IBMVNIC_50GBPS: + adapter->speed = SPEED_50000; + break; + case IBMVNIC_100GBPS: + adapter->speed = SPEED_100000; + break; + case IBMVNIC_200GBPS: + adapter->speed = SPEED_200000; + break; + default: + if (netif_carrier_ok(netdev)) + netdev_warn(netdev, "Unknown speed 0x%08x\n", rspeed); + adapter->speed = SPEED_UNKNOWN; + } + if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_FULL_DUPLEX) + adapter->duplex = DUPLEX_FULL; + else if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_HALF_DUPLEX) + adapter->duplex = DUPLEX_HALF; + else + adapter->duplex = DUPLEX_UNKNOWN; + + return rc; +} + +static void ibmvnic_handle_crq(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_generic_crq *gen_crq = &crq->generic; + struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->vdev->dev; + u64 *u64_crq = (u64 *)crq; + long rc; + + netdev_dbg(netdev, "Handling CRQ: %016lx %016lx\n", + (unsigned long)cpu_to_be64(u64_crq[0]), + (unsigned long)cpu_to_be64(u64_crq[1])); + switch (gen_crq->first) { + case IBMVNIC_CRQ_INIT_RSP: + switch (gen_crq->cmd) { + case IBMVNIC_CRQ_INIT: + dev_info(dev, "Partner initialized\n"); + adapter->from_passive_init = true; + /* Discard any stale login responses from prev reset. + * CHECK: should we clear even on INIT_COMPLETE? + */ + adapter->login_pending = false; + + if (adapter->state == VNIC_DOWN) + rc = ibmvnic_reset(adapter, VNIC_RESET_PASSIVE_INIT); + else + rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + + if (rc && rc != -EBUSY) { + /* We were unable to schedule the failover + * reset either because the adapter was still + * probing (eg: during kexec) or we could not + * allocate memory. Clear the failover_pending + * flag since no one else will. We ignore + * EBUSY because it means either FAILOVER reset + * is already scheduled or the adapter is + * being removed. + */ + netdev_err(netdev, + "Error %ld scheduling failover reset\n", + rc); + adapter->failover_pending = false; + } + + if (!completion_done(&adapter->init_done)) { + if (!adapter->init_done_rc) + adapter->init_done_rc = -EAGAIN; + complete(&adapter->init_done); + } + + break; + case IBMVNIC_CRQ_INIT_COMPLETE: + dev_info(dev, "Partner initialization complete\n"); + adapter->crq.active = true; + send_version_xchg(adapter); + break; + default: + dev_err(dev, "Unknown crq cmd: %d\n", gen_crq->cmd); + } + return; + case IBMVNIC_CRQ_XPORT_EVENT: + netif_carrier_off(netdev); + adapter->crq.active = false; + /* terminate any thread waiting for a response + * from the device + */ + if (!completion_done(&adapter->fw_done)) { + adapter->fw_done_rc = -EIO; + complete(&adapter->fw_done); + } + + /* if we got here during crq-init, retry crq-init */ + if (!completion_done(&adapter->init_done)) { + adapter->init_done_rc = -EAGAIN; + complete(&adapter->init_done); + } + + if (!completion_done(&adapter->stats_done)) + complete(&adapter->stats_done); + if (test_bit(0, &adapter->resetting)) + adapter->force_reset_recovery = true; + if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) { + dev_info(dev, "Migrated, re-enabling adapter\n"); + ibmvnic_reset(adapter, VNIC_RESET_MOBILITY); + } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) { + dev_info(dev, "Backing device failover detected\n"); + adapter->failover_pending = true; + } else { + /* The adapter lost the connection */ + dev_err(dev, "Virtual Adapter failed (rc=%d)\n", + gen_crq->cmd); + ibmvnic_reset(adapter, VNIC_RESET_FATAL); + } + return; + case IBMVNIC_CRQ_CMD_RSP: + break; + default: + dev_err(dev, "Got an invalid msg type 0x%02x\n", + gen_crq->first); + return; + } + + switch (gen_crq->cmd) { + case VERSION_EXCHANGE_RSP: + rc = crq->version_exchange_rsp.rc.code; + if (rc) { + dev_err(dev, "Error %ld in VERSION_EXCHG_RSP\n", rc); + break; + } + ibmvnic_version = + be16_to_cpu(crq->version_exchange_rsp.version); + dev_info(dev, "Partner protocol version is %d\n", + ibmvnic_version); + send_query_cap(adapter); + break; + case QUERY_CAPABILITY_RSP: + handle_query_cap_rsp(crq, adapter); + break; + case QUERY_MAP_RSP: + handle_query_map_rsp(crq, adapter); + break; + case REQUEST_MAP_RSP: + adapter->fw_done_rc = crq->request_map_rsp.rc.code; + complete(&adapter->fw_done); + break; + case REQUEST_UNMAP_RSP: + handle_request_unmap_rsp(crq, adapter); + break; + case REQUEST_CAPABILITY_RSP: + handle_request_cap_rsp(crq, adapter); + break; + case LOGIN_RSP: + netdev_dbg(netdev, "Got Login Response\n"); + handle_login_rsp(crq, adapter); + break; + case LOGICAL_LINK_STATE_RSP: + netdev_dbg(netdev, + "Got Logical Link State Response, state: %d rc: %d\n", + crq->logical_link_state_rsp.link_state, + crq->logical_link_state_rsp.rc.code); + adapter->logical_link_state = + crq->logical_link_state_rsp.link_state; + adapter->init_done_rc = crq->logical_link_state_rsp.rc.code; + complete(&adapter->init_done); + break; + case LINK_STATE_INDICATION: + netdev_dbg(netdev, "Got Logical Link State Indication\n"); + adapter->phys_link_state = + crq->link_state_indication.phys_link_state; + adapter->logical_link_state = + crq->link_state_indication.logical_link_state; + if (adapter->phys_link_state && adapter->logical_link_state) + netif_carrier_on(netdev); + else + netif_carrier_off(netdev); + break; + case CHANGE_MAC_ADDR_RSP: + netdev_dbg(netdev, "Got MAC address change Response\n"); + adapter->fw_done_rc = handle_change_mac_rsp(crq, adapter); + break; + case ERROR_INDICATION: + netdev_dbg(netdev, "Got Error Indication\n"); + handle_error_indication(crq, adapter); + break; + case REQUEST_STATISTICS_RSP: + netdev_dbg(netdev, "Got Statistics Response\n"); + complete(&adapter->stats_done); + break; + case QUERY_IP_OFFLOAD_RSP: + netdev_dbg(netdev, "Got Query IP offload Response\n"); + handle_query_ip_offload_rsp(adapter); + break; + case MULTICAST_CTRL_RSP: + netdev_dbg(netdev, "Got multicast control Response\n"); + break; + case CONTROL_IP_OFFLOAD_RSP: + netdev_dbg(netdev, "Got Control IP offload Response\n"); + dma_unmap_single(dev, adapter->ip_offload_ctrl_tok, + sizeof(adapter->ip_offload_ctrl), + DMA_TO_DEVICE); + complete(&adapter->init_done); + break; + case COLLECT_FW_TRACE_RSP: + netdev_dbg(netdev, "Got Collect firmware trace Response\n"); + complete(&adapter->fw_done); + break; + case GET_VPD_SIZE_RSP: + handle_vpd_size_rsp(crq, adapter); + break; + case GET_VPD_RSP: + handle_vpd_rsp(crq, adapter); + break; + case QUERY_PHYS_PARMS_RSP: + adapter->fw_done_rc = handle_query_phys_parms_rsp(crq, adapter); + complete(&adapter->fw_done); + break; + default: + netdev_err(netdev, "Got an invalid cmd type 0x%02x\n", + gen_crq->cmd); + } +} + +static irqreturn_t ibmvnic_interrupt(int irq, void *instance) +{ + struct ibmvnic_adapter *adapter = instance; + + tasklet_schedule(&adapter->tasklet); + return IRQ_HANDLED; +} + +static void ibmvnic_tasklet(struct tasklet_struct *t) +{ + struct ibmvnic_adapter *adapter = from_tasklet(adapter, t, tasklet); + struct ibmvnic_crq_queue *queue = &adapter->crq; + union ibmvnic_crq *crq; + unsigned long flags; + + spin_lock_irqsave(&queue->lock, flags); + + /* Pull all the valid messages off the CRQ */ + while ((crq = ibmvnic_next_crq(adapter)) != NULL) { + /* This barrier makes sure ibmvnic_next_crq()'s + * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded + * before ibmvnic_handle_crq()'s + * switch(gen_crq->first) and switch(gen_crq->cmd). + */ + dma_rmb(); + ibmvnic_handle_crq(crq, adapter); + crq->generic.first = 0; + } + + spin_unlock_irqrestore(&queue->lock, flags); +} + +static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *adapter) +{ + struct vio_dev *vdev = adapter->vdev; + int rc; + + do { + rc = plpar_hcall_norets(H_ENABLE_CRQ, vdev->unit_address); + } while (rc == H_IN_PROGRESS || rc == H_BUSY || H_IS_LONG_BUSY(rc)); + + if (rc) + dev_err(&vdev->dev, "Error enabling adapter (rc=%d)\n", rc); + + return rc; +} + +static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_crq_queue *crq = &adapter->crq; + struct device *dev = &adapter->vdev->dev; + struct vio_dev *vdev = adapter->vdev; + int rc; + + /* Close the CRQ */ + do { + rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address); + } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); + + /* Clean out the queue */ + if (!crq->msgs) + return -EINVAL; + + memset(crq->msgs, 0, PAGE_SIZE); + crq->cur = 0; + crq->active = false; + + /* And re-open it again */ + rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, + crq->msg_token, PAGE_SIZE); + + if (rc == H_CLOSED) + /* Adapter is good, but other end is not ready */ + dev_warn(dev, "Partner adapter not ready\n"); + else if (rc != 0) + dev_warn(dev, "Couldn't register crq (rc=%d)\n", rc); + + return rc; +} + +static void release_crq_queue(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_crq_queue *crq = &adapter->crq; + struct vio_dev *vdev = adapter->vdev; + long rc; + + if (!crq->msgs) + return; + + netdev_dbg(adapter->netdev, "Releasing CRQ\n"); + free_irq(vdev->irq, adapter); + tasklet_kill(&adapter->tasklet); + do { + rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address); + } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); + + dma_unmap_single(&vdev->dev, crq->msg_token, PAGE_SIZE, + DMA_BIDIRECTIONAL); + free_page((unsigned long)crq->msgs); + crq->msgs = NULL; + crq->active = false; +} + +static int init_crq_queue(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_crq_queue *crq = &adapter->crq; + struct device *dev = &adapter->vdev->dev; + struct vio_dev *vdev = adapter->vdev; + int rc, retrc = -ENOMEM; + + if (crq->msgs) + return 0; + + crq->msgs = (union ibmvnic_crq *)get_zeroed_page(GFP_KERNEL); + /* Should we allocate more than one page? */ + + if (!crq->msgs) + return -ENOMEM; + + crq->size = PAGE_SIZE / sizeof(*crq->msgs); + crq->msg_token = dma_map_single(dev, crq->msgs, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, crq->msg_token)) + goto map_failed; + + rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, + crq->msg_token, PAGE_SIZE); + + if (rc == H_RESOURCE) + /* maybe kexecing and resource is busy. try a reset */ + rc = ibmvnic_reset_crq(adapter); + retrc = rc; + + if (rc == H_CLOSED) { + dev_warn(dev, "Partner adapter not ready\n"); + } else if (rc) { + dev_warn(dev, "Error %d opening adapter\n", rc); + goto reg_crq_failed; + } + + retrc = 0; + + tasklet_setup(&adapter->tasklet, (void *)ibmvnic_tasklet); + + netdev_dbg(adapter->netdev, "registering irq 0x%x\n", vdev->irq); + snprintf(crq->name, sizeof(crq->name), "ibmvnic-%x", + adapter->vdev->unit_address); + rc = request_irq(vdev->irq, ibmvnic_interrupt, 0, crq->name, adapter); + if (rc) { + dev_err(dev, "Couldn't register irq 0x%x. rc=%d\n", + vdev->irq, rc); + goto req_irq_failed; + } + + rc = vio_enable_interrupts(vdev); + if (rc) { + dev_err(dev, "Error %d enabling interrupts\n", rc); + goto req_irq_failed; + } + + crq->cur = 0; + spin_lock_init(&crq->lock); + + /* process any CRQs that were queued before we enabled interrupts */ + tasklet_schedule(&adapter->tasklet); + + return retrc; + +req_irq_failed: + tasklet_kill(&adapter->tasklet); + do { + rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address); + } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); +reg_crq_failed: + dma_unmap_single(dev, crq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL); +map_failed: + free_page((unsigned long)crq->msgs); + crq->msgs = NULL; + return retrc; +} + +static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) +{ + struct device *dev = &adapter->vdev->dev; + unsigned long timeout = msecs_to_jiffies(20000); + u64 old_num_rx_queues = adapter->req_rx_queues; + u64 old_num_tx_queues = adapter->req_tx_queues; + int rc; + + adapter->from_passive_init = false; + + rc = ibmvnic_send_crq_init(adapter); + if (rc) { + dev_err(dev, "Send crq init failed with error %d\n", rc); + return rc; + } + + if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { + dev_err(dev, "Initialization sequence timed out\n"); + return -ETIMEDOUT; + } + + if (adapter->init_done_rc) { + release_crq_queue(adapter); + dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc); + return adapter->init_done_rc; + } + + if (adapter->from_passive_init) { + adapter->state = VNIC_OPEN; + adapter->from_passive_init = false; + dev_err(dev, "CRQ-init failed, passive-init\n"); + return -EINVAL; + } + + if (reset && + test_bit(0, &adapter->resetting) && !adapter->wait_for_reset && + adapter->reset_reason != VNIC_RESET_MOBILITY) { + if (adapter->req_rx_queues != old_num_rx_queues || + adapter->req_tx_queues != old_num_tx_queues) { + release_sub_crqs(adapter, 0); + rc = init_sub_crqs(adapter); + } else { + /* no need to reinitialize completely, but we do + * need to clean up transmits that were in flight + * when we processed the reset. Failure to do so + * will confound the upper layer, usually TCP, by + * creating the illusion of transmits that are + * awaiting completion. + */ + clean_tx_pools(adapter); + + rc = reset_sub_crq_queues(adapter); + } + } else { + rc = init_sub_crqs(adapter); + } + + if (rc) { + dev_err(dev, "Initialization of sub crqs failed\n"); + release_crq_queue(adapter); + return rc; + } + + rc = init_sub_crq_irqs(adapter); + if (rc) { + dev_err(dev, "Failed to initialize sub crq irqs\n"); + release_crq_queue(adapter); + } + + return rc; +} + +static struct device_attribute dev_attr_failover; + +static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) +{ + struct ibmvnic_adapter *adapter; + struct net_device *netdev; + unsigned char *mac_addr_p; + unsigned long flags; + bool init_success; + int rc; + + dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n", + dev->unit_address); + + mac_addr_p = (unsigned char *)vio_get_attribute(dev, + VETH_MAC_ADDR, NULL); + if (!mac_addr_p) { + dev_err(&dev->dev, + "(%s:%3.3d) ERROR: Can't find MAC_ADDR attribute\n", + __FILE__, __LINE__); + return 0; + } + + netdev = alloc_etherdev_mq(sizeof(struct ibmvnic_adapter), + IBMVNIC_MAX_QUEUES); + if (!netdev) + return -ENOMEM; + + adapter = netdev_priv(netdev); + adapter->state = VNIC_PROBING; + dev_set_drvdata(&dev->dev, netdev); + adapter->vdev = dev; + adapter->netdev = netdev; + adapter->login_pending = false; + memset(&adapter->map_ids, 0, sizeof(adapter->map_ids)); + /* map_ids start at 1, so ensure map_id 0 is always "in-use" */ + bitmap_set(adapter->map_ids, 0, 1); + + ether_addr_copy(adapter->mac_addr, mac_addr_p); + eth_hw_addr_set(netdev, adapter->mac_addr); + netdev->irq = dev->irq; + netdev->netdev_ops = &ibmvnic_netdev_ops; + netdev->ethtool_ops = &ibmvnic_ethtool_ops; + SET_NETDEV_DEV(netdev, &dev->dev); + + INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset); + INIT_DELAYED_WORK(&adapter->ibmvnic_delayed_reset, + __ibmvnic_delayed_reset); + INIT_LIST_HEAD(&adapter->rwi_list); + spin_lock_init(&adapter->rwi_lock); + spin_lock_init(&adapter->state_lock); + mutex_init(&adapter->fw_lock); + init_completion(&adapter->probe_done); + init_completion(&adapter->init_done); + init_completion(&adapter->fw_done); + init_completion(&adapter->reset_done); + init_completion(&adapter->stats_done); + clear_bit(0, &adapter->resetting); + adapter->prev_rx_buf_sz = 0; + adapter->prev_mtu = 0; + + init_success = false; + do { + reinit_init_done(adapter); + + /* clear any failovers we got in the previous pass + * since we are reinitializing the CRQ + */ + adapter->failover_pending = false; + + /* If we had already initialized CRQ, we may have one or + * more resets queued already. Discard those and release + * the CRQ before initializing the CRQ again. + */ + release_crq_queue(adapter); + + /* Since we are still in PROBING state, __ibmvnic_reset() + * will not access the ->rwi_list and since we released CRQ, + * we won't get _new_ transport events. But there maybe an + * ongoing ibmvnic_reset() call. So serialize access to + * rwi_list. If we win the race, ibvmnic_reset() could add + * a reset after we purged but thats ok - we just may end + * up with an extra reset (i.e similar to having two or more + * resets in the queue at once). + * CHECK. + */ + spin_lock_irqsave(&adapter->rwi_lock, flags); + flush_reset_queue(adapter); + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + + rc = init_crq_queue(adapter); + if (rc) { + dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n", + rc); + goto ibmvnic_init_fail; + } + + rc = ibmvnic_reset_init(adapter, false); + } while (rc == -EAGAIN); + + /* We are ignoring the error from ibmvnic_reset_init() assuming that the + * partner is not ready. CRQ is not active. When the partner becomes + * ready, we will do the passive init reset. + */ + + if (!rc) + init_success = true; + + rc = init_stats_buffers(adapter); + if (rc) + goto ibmvnic_init_fail; + + rc = init_stats_token(adapter); + if (rc) + goto ibmvnic_stats_fail; + + rc = device_create_file(&dev->dev, &dev_attr_failover); + if (rc) + goto ibmvnic_dev_file_err; + + netif_carrier_off(netdev); + + if (init_success) { + adapter->state = VNIC_PROBED; + netdev->mtu = adapter->req_mtu - ETH_HLEN; + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; + } else { + adapter->state = VNIC_DOWN; + } + + adapter->wait_for_reset = false; + adapter->last_reset_time = jiffies; + + rc = register_netdev(netdev); + if (rc) { + dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); + goto ibmvnic_register_fail; + } + dev_info(&dev->dev, "ibmvnic registered\n"); + + complete(&adapter->probe_done); + + return 0; + +ibmvnic_register_fail: + device_remove_file(&dev->dev, &dev_attr_failover); + +ibmvnic_dev_file_err: + release_stats_token(adapter); + +ibmvnic_stats_fail: + release_stats_buffers(adapter); + +ibmvnic_init_fail: + release_sub_crqs(adapter, 1); + release_crq_queue(adapter); + + /* cleanup worker thread after releasing CRQ so we don't get + * transport events (i.e new work items for the worker thread). + */ + adapter->state = VNIC_REMOVING; + complete(&adapter->probe_done); + flush_work(&adapter->ibmvnic_reset); + flush_delayed_work(&adapter->ibmvnic_delayed_reset); + + flush_reset_queue(adapter); + + mutex_destroy(&adapter->fw_lock); + free_netdev(netdev); + + return rc; +} + +static void ibmvnic_remove(struct vio_dev *dev) +{ + struct net_device *netdev = dev_get_drvdata(&dev->dev); + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + unsigned long flags; + + spin_lock_irqsave(&adapter->state_lock, flags); + + /* If ibmvnic_reset() is scheduling a reset, wait for it to + * finish. Then, set the state to REMOVING to prevent it from + * scheduling any more work and to have reset functions ignore + * any resets that have already been scheduled. Drop the lock + * after setting state, so __ibmvnic_reset() which is called + * from the flush_work() below, can make progress. + */ + spin_lock(&adapter->rwi_lock); + adapter->state = VNIC_REMOVING; + spin_unlock(&adapter->rwi_lock); + + spin_unlock_irqrestore(&adapter->state_lock, flags); + + flush_work(&adapter->ibmvnic_reset); + flush_delayed_work(&adapter->ibmvnic_delayed_reset); + + rtnl_lock(); + unregister_netdevice(netdev); + + release_resources(adapter); + release_rx_pools(adapter); + release_tx_pools(adapter); + release_sub_crqs(adapter, 1); + release_crq_queue(adapter); + + release_stats_token(adapter); + release_stats_buffers(adapter); + + adapter->state = VNIC_REMOVED; + + rtnl_unlock(); + mutex_destroy(&adapter->fw_lock); + device_remove_file(&dev->dev, &dev_attr_failover); + free_netdev(netdev); + dev_set_drvdata(&dev->dev, NULL); +} + +static ssize_t failover_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + __be64 session_token; + long rc; + + if (!sysfs_streq(buf, "1")) + return -EINVAL; + + rc = plpar_hcall(H_VIOCTL, retbuf, adapter->vdev->unit_address, + H_GET_SESSION_TOKEN, 0, 0, 0); + if (rc) { + netdev_err(netdev, "Couldn't retrieve session token, rc %ld\n", + rc); + goto last_resort; + } + + session_token = (__be64)retbuf[0]; + netdev_dbg(netdev, "Initiating client failover, session id %llx\n", + be64_to_cpu(session_token)); + rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, + H_SESSION_ERR_DETECTED, session_token, 0, 0); + if (rc) { + netdev_err(netdev, + "H_VIOCTL initiated failover failed, rc %ld\n", + rc); + goto last_resort; + } + + return count; + +last_resort: + netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n"); + ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + + return count; +} +static DEVICE_ATTR_WO(failover); + +static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev) +{ + struct net_device *netdev = dev_get_drvdata(&vdev->dev); + struct ibmvnic_adapter *adapter; + struct iommu_table *tbl; + unsigned long ret = 0; + int i; + + tbl = get_iommu_table_base(&vdev->dev); + + /* netdev inits at probe time along with the structures we need below*/ + if (!netdev) + return IOMMU_PAGE_ALIGN(IBMVNIC_IO_ENTITLEMENT_DEFAULT, tbl); + + adapter = netdev_priv(netdev); + + ret += PAGE_SIZE; /* the crq message queue */ + ret += IOMMU_PAGE_ALIGN(sizeof(struct ibmvnic_statistics), tbl); + + for (i = 0; i < adapter->req_tx_queues + adapter->req_rx_queues; i++) + ret += 4 * PAGE_SIZE; /* the scrq message queue */ + + for (i = 0; i < adapter->num_active_rx_pools; i++) + ret += adapter->rx_pool[i].size * + IOMMU_PAGE_ALIGN(adapter->rx_pool[i].buff_size, tbl); + + return ret; +} + +static int ibmvnic_resume(struct device *dev) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + if (adapter->state != VNIC_OPEN) + return 0; + + tasklet_schedule(&adapter->tasklet); + + return 0; +} + +static const struct vio_device_id ibmvnic_device_table[] = { + {"network", "IBM,vnic"}, + {"", "" } +}; +MODULE_DEVICE_TABLE(vio, ibmvnic_device_table); + +static const struct dev_pm_ops ibmvnic_pm_ops = { + .resume = ibmvnic_resume +}; + +static struct vio_driver ibmvnic_driver = { + .id_table = ibmvnic_device_table, + .probe = ibmvnic_probe, + .remove = ibmvnic_remove, + .get_desired_dma = ibmvnic_get_desired_dma, + .name = ibmvnic_driver_name, + .pm = &ibmvnic_pm_ops, +}; + +/* module functions */ +static int __init ibmvnic_module_init(void) +{ + pr_info("%s: %s %s\n", ibmvnic_driver_name, ibmvnic_driver_string, + IBMVNIC_DRIVER_VERSION); + + return vio_register_driver(&ibmvnic_driver); +} + +static void __exit ibmvnic_module_exit(void) +{ + vio_unregister_driver(&ibmvnic_driver); +} + +module_init(ibmvnic_module_init); +module_exit(ibmvnic_module_exit); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h new file mode 100644 index 000000000..e5c6ff3d0 --- /dev/null +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -0,0 +1,1062 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/**************************************************************************/ +/* */ +/* IBM System i and System p Virtual NIC Device Driver */ +/* Copyright (C) 2014 IBM Corp. */ +/* Santiago Leon (santi_leon@yahoo.com) */ +/* Thomas Falcon (tlfalcon@linux.vnet.ibm.com) */ +/* John Allen (jallen@linux.vnet.ibm.com) */ +/* */ +/* */ +/* This module contains the implementation of a virtual ethernet device */ +/* for use with IBM i/pSeries LPAR Linux. It utilizes the logical LAN */ +/* option of the RS/6000 Platform Architecture to interface with virtual */ +/* ethernet NICs that are presented to the partition by the hypervisor. */ +/* */ +/**************************************************************************/ + +#define IBMVNIC_NAME "ibmvnic" +#define IBMVNIC_DRIVER_VERSION "1.0.1" +#define IBMVNIC_INVALID_MAP -1 +#define IBMVNIC_OPEN_FAILED 3 + +/* basic structures plus 100 2k buffers */ +#define IBMVNIC_IO_ENTITLEMENT_DEFAULT 610305 + +/* Initial module_parameters */ +#define IBMVNIC_RX_WEIGHT 16 +/* when changing this, update IBMVNIC_IO_ENTITLEMENT_DEFAULT */ +#define IBMVNIC_BUFFS_PER_POOL 100 +#define IBMVNIC_MAX_QUEUES 16 +#define IBMVNIC_MAX_QUEUE_SZ 4096 +#define IBMVNIC_MAX_IND_DESCS 16 +#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) + +#define IBMVNIC_TSO_BUF_SZ 65536 +#define IBMVNIC_TSO_BUFS 64 +#define IBMVNIC_TSO_POOL_MASK 0x80000000 + +/* A VNIC adapter has set of Rx and Tx pools (aka queues). Each Rx/Tx pool + * has a set of buffers. The size of each buffer is determined by the MTU. + * + * Each Rx/Tx pool is also associated with a DMA region that is shared + * with the "hardware" (VIOS) and used to send/receive packets. The DMA + * region is also referred to as a Long Term Buffer or LTB. + * + * The size of the DMA region required for an Rx/Tx pool depends on the + * number and size (MTU) of the buffers in the pool. At the max levels + * of 4096 jumbo frames (MTU=9000) we will need about 9K*4K = 36MB plus + * some padding. + * + * But the size of a single DMA region is limited by MAX_ORDER in the + * kernel (about 16MB currently). To support say 4K Jumbo frames, we + * use a set of LTBs (struct ltb_set) per pool. + * + * IBMVNIC_ONE_LTB_MAX - max size of each LTB supported by kernel + * IBMVNIC_ONE_LTB_SIZE - current max size of each LTB in an ltb_set + * (must be <= IBMVNIC_ONE_LTB_MAX) + * IBMVNIC_LTB_SET_SIZE - current size of all LTBs in an ltb_set + * + * Each VNIC can have upto 16 Rx, 16 Tx and 16 TSO pools. The TSO pools + * are of fixed length (IBMVNIC_TSO_BUF_SZ * IBMVNIC_TSO_BUFS) of 4MB. + * + * The Rx and Tx pools can have upto 4096 buffers. The max size of these + * buffers is about 9588 (for jumbo frames, including IBMVNIC_BUFFER_HLEN). + * So, setting the IBMVNIC_LTB_SET_SIZE for a pool to 4096 * 9588 ~= 38MB. + * + * There is a trade-off in setting IBMVNIC_ONE_LTB_SIZE. If it is large, + * the allocation of the LTB can fail when system is low in memory. If + * its too small, we would need several mappings for each of the Rx/ + * Tx/TSO pools but there is a limit of 255 mappings per vnic in the + * VNIC protocol. + * + * So setting IBMVNIC_ONE_LTB_SIZE to 8MB. With IBMVNIC_LTB_SET_SIZE set + * to 38MB, we will need 5 LTBs per Rx and Tx pool and 1 LTB per TSO + * pool for the 4MB. Thus the 16 Rx and Tx queues require 32 * 5 = 160 + * plus 16 for the TSO pools for a total of 176 LTB mappings per VNIC. + */ +#define IBMVNIC_ONE_LTB_MAX ((u32)((1 << (MAX_ORDER - 1)) * PAGE_SIZE)) +#define IBMVNIC_ONE_LTB_SIZE min((u32)(8 << 20), IBMVNIC_ONE_LTB_MAX) +#define IBMVNIC_LTB_SET_SIZE (38 << 20) + +#define IBMVNIC_BUFFER_HLEN 500 +#define IBMVNIC_RESET_DELAY 100 + +struct ibmvnic_login_buffer { + __be32 len; + __be32 version; +#define INITIAL_VERSION_LB 1 + __be32 num_txcomp_subcrqs; + __be32 off_txcomp_subcrqs; + __be32 num_rxcomp_subcrqs; + __be32 off_rxcomp_subcrqs; + __be32 login_rsp_ioba; + __be32 login_rsp_len; + __be32 client_data_offset; + __be32 client_data_len; +} __packed __aligned(8); + +struct ibmvnic_login_rsp_buffer { + __be32 len; + __be32 version; +#define INITIAL_VERSION_LRB 1 + __be32 num_txsubm_subcrqs; + __be32 off_txsubm_subcrqs; + __be32 num_rxadd_subcrqs; + __be32 off_rxadd_subcrqs; + __be32 off_rxadd_buff_size; + __be32 num_supp_tx_desc; + __be32 off_supp_tx_desc; +} __packed __aligned(8); + +struct ibmvnic_query_ip_offload_buffer { + __be32 len; + __be32 version; +#define INITIAL_VERSION_IOB 1 + u8 ipv4_chksum; + u8 ipv6_chksum; + u8 tcp_ipv4_chksum; + u8 tcp_ipv6_chksum; + u8 udp_ipv4_chksum; + u8 udp_ipv6_chksum; + u8 large_tx_ipv4; + u8 large_tx_ipv6; + u8 large_rx_ipv4; + u8 large_rx_ipv6; + u8 reserved1[14]; + __be16 max_ipv4_header_size; + __be16 max_ipv6_header_size; + __be16 max_tcp_header_size; + __be16 max_udp_header_size; + __be32 max_large_tx_size; + __be32 max_large_rx_size; + u8 reserved2[16]; + u8 ipv6_extension_header; +#define IPV6_EH_NOT_SUPPORTED 0x00 +#define IPV6_EH_SUPPORTED_LIM 0x01 +#define IPV6_EH_SUPPORTED 0xFF + u8 tcp_pseudosum_req; +#define TCP_PS_NOT_REQUIRED 0x00 +#define TCP_PS_REQUIRED 0x01 + u8 reserved3[30]; + __be16 num_ipv6_ext_headers; + __be32 off_ipv6_ext_headers; + u8 reserved4[154]; +} __packed __aligned(8); + +struct ibmvnic_control_ip_offload_buffer { + __be32 len; + __be32 version; +#define INITIAL_VERSION_IOB 1 + u8 ipv4_chksum; + u8 ipv6_chksum; + u8 tcp_ipv4_chksum; + u8 tcp_ipv6_chksum; + u8 udp_ipv4_chksum; + u8 udp_ipv6_chksum; + u8 large_tx_ipv4; + u8 large_tx_ipv6; + u8 bad_packet_rx; + u8 large_rx_ipv4; + u8 large_rx_ipv6; + u8 reserved4[111]; +} __packed __aligned(8); + +struct ibmvnic_fw_component { + u8 name[48]; + __be32 trace_buff_size; + u8 correlator; + u8 trace_level; + u8 parent_correlator; + u8 error_check_level; + u8 trace_on; + u8 reserved[7]; + u8 description[192]; +} __packed __aligned(8); + +struct ibmvnic_fw_trace_entry { + __be32 trace_id; + u8 num_valid_data; + u8 reserved[3]; + __be64 pmc_registers; + __be64 timebase; + __be64 trace_data[5]; +} __packed __aligned(8); + +struct ibmvnic_statistics { + __be32 version; + __be32 promiscuous; + __be64 rx_packets; + __be64 rx_bytes; + __be64 tx_packets; + __be64 tx_bytes; + __be64 ucast_tx_packets; + __be64 ucast_rx_packets; + __be64 mcast_tx_packets; + __be64 mcast_rx_packets; + __be64 bcast_tx_packets; + __be64 bcast_rx_packets; + __be64 align_errors; + __be64 fcs_errors; + __be64 single_collision_frames; + __be64 multi_collision_frames; + __be64 sqe_test_errors; + __be64 deferred_tx; + __be64 late_collisions; + __be64 excess_collisions; + __be64 internal_mac_tx_errors; + __be64 carrier_sense; + __be64 too_long_frames; + __be64 internal_mac_rx_errors; + u8 reserved[72]; +} __packed __aligned(8); + +#define NUM_TX_STATS 3 +struct ibmvnic_tx_queue_stats { + u64 packets; + u64 bytes; + u64 dropped_packets; +}; + +#define NUM_RX_STATS 3 +struct ibmvnic_rx_queue_stats { + u64 packets; + u64 bytes; + u64 interrupts; +}; + +struct ibmvnic_acl_buffer { + __be32 len; + __be32 version; +#define INITIAL_VERSION_IOB 1 + u8 mac_acls_restrict; + u8 vlan_acls_restrict; + u8 reserved1[22]; + __be32 num_mac_addrs; + __be32 offset_mac_addrs; + __be32 num_vlan_ids; + __be32 offset_vlan_ids; + u8 reserved2[80]; +} __packed __aligned(8); + +/* descriptors have been changed, how should this be defined? 1? 4? */ + +#define IBMVNIC_TX_DESC_VERSIONS 3 + +/* is this still needed? */ +struct ibmvnic_tx_comp_desc { + u8 first; + u8 num_comps; + __be16 rcs[5]; + __be32 correlators[5]; +} __packed __aligned(8); + +/* some flags that included in v0 descriptor, which is gone + * only used for IBMVNIC_TCP_CHKSUM and IBMVNIC_UDP_CHKSUM + * and only in some offload_flags variable that doesn't seem + * to be used anywhere, can probably be removed? + */ + +#define IBMVNIC_TCP_CHKSUM 0x20 +#define IBMVNIC_UDP_CHKSUM 0x08 + +struct ibmvnic_tx_desc { + u8 first; + u8 type; + +#define IBMVNIC_TX_DESC 0x10 + u8 n_crq_elem; + u8 n_sge; + u8 flags1; +#define IBMVNIC_TX_COMP_NEEDED 0x80 +#define IBMVNIC_TX_CHKSUM_OFFLOAD 0x40 +#define IBMVNIC_TX_LSO 0x20 +#define IBMVNIC_TX_PROT_TCP 0x10 +#define IBMVNIC_TX_PROT_UDP 0x08 +#define IBMVNIC_TX_PROT_IPV4 0x04 +#define IBMVNIC_TX_PROT_IPV6 0x02 +#define IBMVNIC_TX_VLAN_PRESENT 0x01 + u8 flags2; +#define IBMVNIC_TX_VLAN_INSERT 0x80 + __be16 mss; + u8 reserved[4]; + __be32 correlator; + __be16 vlan_id; + __be16 dma_reg; + __be32 sge_len; + __be64 ioba; +} __packed __aligned(8); + +struct ibmvnic_hdr_desc { + u8 first; + u8 type; +#define IBMVNIC_HDR_DESC 0x11 + u8 len; + u8 l2_len; + __be16 l3_len; + u8 l4_len; + u8 flag; + u8 data[24]; +} __packed __aligned(8); + +struct ibmvnic_hdr_ext_desc { + u8 first; + u8 type; +#define IBMVNIC_HDR_EXT_DESC 0x12 + u8 len; + u8 data[29]; +} __packed __aligned(8); + +struct ibmvnic_sge_desc { + u8 first; + u8 type; +#define IBMVNIC_SGE_DESC 0x30 + __be16 sge1_dma_reg; + __be32 sge1_len; + __be64 sge1_ioba; + __be16 reserved; + __be16 sge2_dma_reg; + __be32 sge2_len; + __be64 sge2_ioba; +} __packed __aligned(8); + +struct ibmvnic_rx_comp_desc { + u8 first; + u8 flags; +#define IBMVNIC_IP_CHKSUM_GOOD 0x80 +#define IBMVNIC_TCP_UDP_CHKSUM_GOOD 0x40 +#define IBMVNIC_END_FRAME 0x20 +#define IBMVNIC_EXACT_MC 0x10 +#define IBMVNIC_VLAN_STRIPPED 0x08 + __be16 off_frame_data; + __be32 len; + __be64 correlator; + __be16 vlan_tci; + __be16 rc; + u8 reserved[12]; +} __packed __aligned(8); + +struct ibmvnic_generic_scrq { + u8 first; + u8 reserved[31]; +} __packed __aligned(8); + +struct ibmvnic_rx_buff_add_desc { + u8 first; + u8 reserved[7]; + __be64 correlator; + __be32 ioba; + u8 map_id; + __be32 len:24; + u8 reserved2[8]; +} __packed __aligned(8); + +struct ibmvnic_rc { + u8 code; /* one of enum ibmvnic_rc_codes */ + u8 detailed_data[3]; +} __packed __aligned(4); + +struct ibmvnic_generic_crq { + u8 first; + u8 cmd; + u8 params[10]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_version_exchange { + u8 first; + u8 cmd; + __be16 version; +#define IBMVNIC_INITIAL_VERSION 1 + u8 reserved[8]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_capability { + u8 first; + u8 cmd; + __be16 capability; /* one of ibmvnic_capabilities */ + __be64 number; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_login { + u8 first; + u8 cmd; + u8 reserved[6]; + __be32 ioba; + __be32 len; +} __packed __aligned(8); + +struct ibmvnic_phys_parms { + u8 first; + u8 cmd; + u8 flags1; +#define IBMVNIC_EXTERNAL_LOOPBACK 0x80 +#define IBMVNIC_INTERNAL_LOOPBACK 0x40 +#define IBMVNIC_PROMISC 0x20 +#define IBMVNIC_PHYS_LINK_ACTIVE 0x10 +#define IBMVNIC_AUTONEG_DUPLEX 0x08 +#define IBMVNIC_FULL_DUPLEX 0x04 +#define IBMVNIC_HALF_DUPLEX 0x02 +#define IBMVNIC_CAN_CHG_PHYS_PARMS 0x01 + u8 flags2; +#define IBMVNIC_LOGICAL_LNK_ACTIVE 0x80 + __be32 speed; +#define IBMVNIC_AUTONEG 0x80000000 +#define IBMVNIC_10MBPS 0x40000000 +#define IBMVNIC_100MBPS 0x20000000 +#define IBMVNIC_1GBPS 0x10000000 +#define IBMVNIC_10GBPS 0x08000000 +#define IBMVNIC_40GBPS 0x04000000 +#define IBMVNIC_100GBPS 0x02000000 +#define IBMVNIC_25GBPS 0x01000000 +#define IBMVNIC_50GBPS 0x00800000 +#define IBMVNIC_200GBPS 0x00400000 + __be32 mtu; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_logical_link_state { + u8 first; + u8 cmd; + u8 link_state; +#define IBMVNIC_LOGICAL_LNK_DN 0x00 +#define IBMVNIC_LOGICAL_LNK_UP 0x01 +#define IBMVNIC_LOGICAL_LNK_QUERY 0xff + u8 reserved[9]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_query_ip_offload { + u8 first; + u8 cmd; + u8 reserved[2]; + __be32 len; + __be32 ioba; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_control_ip_offload { + u8 first; + u8 cmd; + u8 reserved[2]; + __be32 ioba; + __be32 len; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_request_statistics { + u8 first; + u8 cmd; + u8 flags; +#define IBMVNIC_PHYSICAL_PORT 0x80 + u8 reserved1; + __be32 ioba; + __be32 len; + u8 reserved[4]; +} __packed __aligned(8); + +struct ibmvnic_error_indication { + u8 first; + u8 cmd; + u8 flags; +#define IBMVNIC_FATAL_ERROR 0x80 + u8 reserved1; + __be32 error_id; + __be32 detail_error_sz; + __be16 error_cause; + u8 reserved2[2]; +} __packed __aligned(8); + +struct ibmvnic_link_state_indication { + u8 first; + u8 cmd; + u8 reserved1[2]; + u8 phys_link_state; + u8 logical_link_state; + u8 reserved2[10]; +} __packed __aligned(8); + +struct ibmvnic_change_mac_addr { + u8 first; + u8 cmd; + u8 mac_addr[6]; + u8 reserved[4]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_multicast_ctrl { + u8 first; + u8 cmd; + u8 mac_addr[6]; + u8 flags; +#define IBMVNIC_ENABLE_MC 0x80 +#define IBMVNIC_DISABLE_MC 0x40 +#define IBMVNIC_ENABLE_ALL 0x20 +#define IBMVNIC_DISABLE_ALL 0x10 + u8 reserved1; + __be16 reserved2; /* was num_enabled_mc_addr; */ + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_get_vpd_size { + u8 first; + u8 cmd; + u8 reserved[14]; +} __packed __aligned(8); + +struct ibmvnic_get_vpd_size_rsp { + u8 first; + u8 cmd; + u8 reserved[2]; + __be64 len; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_get_vpd { + u8 first; + u8 cmd; + u8 reserved1[2]; + __be32 ioba; + __be32 len; + u8 reserved[4]; +} __packed __aligned(8); + +struct ibmvnic_get_vpd_rsp { + u8 first; + u8 cmd; + u8 reserved[10]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_acl_change_indication { + u8 first; + u8 cmd; + __be16 change_type; +#define IBMVNIC_MAC_ACL 0 +#define IBMVNIC_VLAN_ACL 1 + u8 reserved[12]; +} __packed __aligned(8); + +struct ibmvnic_acl_query { + u8 first; + u8 cmd; + u8 reserved1[2]; + __be32 ioba; + __be32 len; + u8 reserved2[4]; +} __packed __aligned(8); + +struct ibmvnic_tune { + u8 first; + u8 cmd; + u8 reserved1[2]; + __be32 ioba; + __be32 len; + u8 reserved2[4]; +} __packed __aligned(8); + +struct ibmvnic_request_map { + u8 first; + u8 cmd; + u8 reserved1; + u8 map_id; + __be32 ioba; + __be32 len; + u8 reserved2[4]; +} __packed __aligned(8); + +struct ibmvnic_request_map_rsp { + u8 first; + u8 cmd; + u8 reserved1; + u8 map_id; + u8 reserved2[8]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_request_unmap { + u8 first; + u8 cmd; + u8 reserved1; + u8 map_id; + u8 reserved2[12]; +} __packed __aligned(8); + +struct ibmvnic_request_unmap_rsp { + u8 first; + u8 cmd; + u8 reserved1; + u8 map_id; + u8 reserved2[8]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_query_map { + u8 first; + u8 cmd; + u8 reserved[14]; +} __packed __aligned(8); + +struct ibmvnic_query_map_rsp { + u8 first; + u8 cmd; + u8 reserved; + u8 page_size; + __be32 tot_pages; + __be32 free_pages; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +union ibmvnic_crq { + struct ibmvnic_generic_crq generic; + struct ibmvnic_version_exchange version_exchange; + struct ibmvnic_version_exchange version_exchange_rsp; + struct ibmvnic_capability query_capability; + struct ibmvnic_capability query_capability_rsp; + struct ibmvnic_capability request_capability; + struct ibmvnic_capability request_capability_rsp; + struct ibmvnic_login login; + struct ibmvnic_generic_crq login_rsp; + struct ibmvnic_phys_parms query_phys_parms; + struct ibmvnic_phys_parms query_phys_parms_rsp; + struct ibmvnic_phys_parms query_phys_capabilities; + struct ibmvnic_phys_parms query_phys_capabilities_rsp; + struct ibmvnic_phys_parms set_phys_parms; + struct ibmvnic_phys_parms set_phys_parms_rsp; + struct ibmvnic_logical_link_state logical_link_state; + struct ibmvnic_logical_link_state logical_link_state_rsp; + struct ibmvnic_query_ip_offload query_ip_offload; + struct ibmvnic_query_ip_offload query_ip_offload_rsp; + struct ibmvnic_control_ip_offload control_ip_offload; + struct ibmvnic_control_ip_offload control_ip_offload_rsp; + struct ibmvnic_request_statistics request_statistics; + struct ibmvnic_generic_crq request_statistics_rsp; + struct ibmvnic_error_indication error_indication; + struct ibmvnic_link_state_indication link_state_indication; + struct ibmvnic_change_mac_addr change_mac_addr; + struct ibmvnic_change_mac_addr change_mac_addr_rsp; + struct ibmvnic_multicast_ctrl multicast_ctrl; + struct ibmvnic_multicast_ctrl multicast_ctrl_rsp; + struct ibmvnic_get_vpd_size get_vpd_size; + struct ibmvnic_get_vpd_size_rsp get_vpd_size_rsp; + struct ibmvnic_get_vpd get_vpd; + struct ibmvnic_get_vpd_rsp get_vpd_rsp; + struct ibmvnic_acl_change_indication acl_change_indication; + struct ibmvnic_acl_query acl_query; + struct ibmvnic_generic_crq acl_query_rsp; + struct ibmvnic_tune tune; + struct ibmvnic_generic_crq tune_rsp; + struct ibmvnic_request_map request_map; + struct ibmvnic_request_map_rsp request_map_rsp; + struct ibmvnic_request_unmap request_unmap; + struct ibmvnic_request_unmap_rsp request_unmap_rsp; + struct ibmvnic_query_map query_map; + struct ibmvnic_query_map_rsp query_map_rsp; +}; + +enum ibmvnic_rc_codes { + SUCCESS = 0, + PARTIALSUCCESS = 1, + PERMISSION = 2, + NOMEMORY = 3, + PARAMETER = 4, + UNKNOWNCOMMAND = 5, + ABORTED = 6, + INVALIDSTATE = 7, + INVALIDIOBA = 8, + INVALIDLENGTH = 9, + UNSUPPORTEDOPTION = 10, +}; + +enum ibmvnic_capabilities { + MIN_TX_QUEUES = 1, + MIN_RX_QUEUES = 2, + MIN_RX_ADD_QUEUES = 3, + MAX_TX_QUEUES = 4, + MAX_RX_QUEUES = 5, + MAX_RX_ADD_QUEUES = 6, + REQ_TX_QUEUES = 7, + REQ_RX_QUEUES = 8, + REQ_RX_ADD_QUEUES = 9, + MIN_TX_ENTRIES_PER_SUBCRQ = 10, + MIN_RX_ADD_ENTRIES_PER_SUBCRQ = 11, + MAX_TX_ENTRIES_PER_SUBCRQ = 12, + MAX_RX_ADD_ENTRIES_PER_SUBCRQ = 13, + REQ_TX_ENTRIES_PER_SUBCRQ = 14, + REQ_RX_ADD_ENTRIES_PER_SUBCRQ = 15, + TCP_IP_OFFLOAD = 16, + PROMISC_REQUESTED = 17, + PROMISC_SUPPORTED = 18, + MIN_MTU = 19, + MAX_MTU = 20, + REQ_MTU = 21, + MAX_MULTICAST_FILTERS = 22, + VLAN_HEADER_INSERTION = 23, + RX_VLAN_HEADER_INSERTION = 24, + MAX_TX_SG_ENTRIES = 25, + RX_SG_SUPPORTED = 26, + RX_SG_REQUESTED = 27, + OPT_TX_COMP_SUB_QUEUES = 28, + OPT_RX_COMP_QUEUES = 29, + OPT_RX_BUFADD_Q_PER_RX_COMP_Q = 30, + OPT_TX_ENTRIES_PER_SUBCRQ = 31, + OPT_RXBA_ENTRIES_PER_SUBCRQ = 32, + TX_RX_DESC_REQ = 33, +}; + +enum ibmvnic_error_cause { + ADAPTER_PROBLEM = 0, + BUS_PROBLEM = 1, + FW_PROBLEM = 2, + DD_PROBLEM = 3, + EEH_RECOVERY = 4, + FW_UPDATED = 5, + LOW_MEMORY = 6, +}; + +enum ibmvnic_commands { + VERSION_EXCHANGE = 0x01, + VERSION_EXCHANGE_RSP = 0x81, + QUERY_CAPABILITY = 0x02, + QUERY_CAPABILITY_RSP = 0x82, + REQUEST_CAPABILITY = 0x03, + REQUEST_CAPABILITY_RSP = 0x83, + LOGIN = 0x04, + LOGIN_RSP = 0x84, + QUERY_PHYS_PARMS = 0x05, + QUERY_PHYS_PARMS_RSP = 0x85, + QUERY_PHYS_CAPABILITIES = 0x06, + QUERY_PHYS_CAPABILITIES_RSP = 0x86, + SET_PHYS_PARMS = 0x07, + SET_PHYS_PARMS_RSP = 0x87, + ERROR_INDICATION = 0x08, + LOGICAL_LINK_STATE = 0x0C, + LOGICAL_LINK_STATE_RSP = 0x8C, + REQUEST_STATISTICS = 0x0D, + REQUEST_STATISTICS_RSP = 0x8D, + COLLECT_FW_TRACE = 0x11, + COLLECT_FW_TRACE_RSP = 0x91, + LINK_STATE_INDICATION = 0x12, + CHANGE_MAC_ADDR = 0x13, + CHANGE_MAC_ADDR_RSP = 0x93, + MULTICAST_CTRL = 0x14, + MULTICAST_CTRL_RSP = 0x94, + GET_VPD_SIZE = 0x15, + GET_VPD_SIZE_RSP = 0x95, + GET_VPD = 0x16, + GET_VPD_RSP = 0x96, + TUNE = 0x17, + TUNE_RSP = 0x97, + QUERY_IP_OFFLOAD = 0x18, + QUERY_IP_OFFLOAD_RSP = 0x98, + CONTROL_IP_OFFLOAD = 0x19, + CONTROL_IP_OFFLOAD_RSP = 0x99, + ACL_CHANGE_INDICATION = 0x1A, + ACL_QUERY = 0x1B, + ACL_QUERY_RSP = 0x9B, + QUERY_MAP = 0x1D, + QUERY_MAP_RSP = 0x9D, + REQUEST_MAP = 0x1E, + REQUEST_MAP_RSP = 0x9E, + REQUEST_UNMAP = 0x1F, + REQUEST_UNMAP_RSP = 0x9F, + VLAN_CTRL = 0x20, + VLAN_CTRL_RSP = 0xA0, +}; + +enum ibmvnic_crq_type { + IBMVNIC_CRQ_CMD = 0x80, + IBMVNIC_CRQ_CMD_RSP = 0x80, + IBMVNIC_CRQ_INIT_CMD = 0xC0, + IBMVNIC_CRQ_INIT_RSP = 0xC0, + IBMVNIC_CRQ_XPORT_EVENT = 0xFF, +}; + +enum ibmvfc_crq_format { + IBMVNIC_CRQ_INIT = 0x01, + IBMVNIC_CRQ_INIT_COMPLETE = 0x02, + IBMVNIC_PARTITION_MIGRATED = 0x06, + IBMVNIC_DEVICE_FAILOVER = 0x08, +}; + +struct ibmvnic_crq_queue { + union ibmvnic_crq *msgs; + int size, cur; + dma_addr_t msg_token; + /* Used for serialization of msgs, cur */ + spinlock_t lock; + bool active; + char name[32]; +}; + +union sub_crq { + struct ibmvnic_generic_scrq generic; + struct ibmvnic_tx_comp_desc tx_comp; + struct ibmvnic_tx_desc v1; + struct ibmvnic_hdr_desc hdr; + struct ibmvnic_hdr_ext_desc hdr_ext; + struct ibmvnic_sge_desc sge; + struct ibmvnic_rx_comp_desc rx_comp; + struct ibmvnic_rx_buff_add_desc rx_add; +}; + +struct ibmvnic_ind_xmit_queue { + union sub_crq *indir_arr; + dma_addr_t indir_dma; + int index; +}; + +struct ibmvnic_sub_crq_queue { + union sub_crq *msgs; + int size, cur; + dma_addr_t msg_token; + unsigned long crq_num; + unsigned long hw_irq; + unsigned int irq; + unsigned int pool_index; + int scrq_num; + /* Used for serialization of msgs, cur */ + spinlock_t lock; + struct sk_buff *rx_skb_top; + struct ibmvnic_adapter *adapter; + struct ibmvnic_ind_xmit_queue ind_buf; + atomic_t used; + char name[32]; + u64 handle; +} ____cacheline_aligned; + +struct ibmvnic_long_term_buff { + unsigned char *buff; + dma_addr_t addr; + u64 size; + u8 map_id; +}; + +struct ibmvnic_ltb_set { + int num_ltbs; + struct ibmvnic_long_term_buff *ltbs; +}; + +struct ibmvnic_tx_buff { + struct sk_buff *skb; + int index; + int pool_index; + int num_entries; +}; + +struct ibmvnic_tx_pool { + struct ibmvnic_tx_buff *tx_buff; + int *free_map; + int consumer_index; + int producer_index; + struct ibmvnic_ltb_set ltb_set; + int num_buffers; + int buf_size; +} ____cacheline_aligned; + +struct ibmvnic_rx_buff { + struct sk_buff *skb; + dma_addr_t dma; + unsigned char *data; + int size; + int pool_index; +}; + +struct ibmvnic_rx_pool { + struct ibmvnic_rx_buff *rx_buff; + int size; /* # of buffers in the pool */ + int index; + int buff_size; + atomic_t available; + int *free_map; + int next_free; + int next_alloc; + int active; + struct ibmvnic_ltb_set ltb_set; +} ____cacheline_aligned; + +struct ibmvnic_vpd { + unsigned char *buff; + dma_addr_t dma_addr; + u64 len; +}; + +enum vnic_state {VNIC_PROBING = 1, + VNIC_PROBED, + VNIC_OPENING, + VNIC_OPEN, + VNIC_CLOSING, + VNIC_CLOSED, + VNIC_REMOVING, + VNIC_REMOVED, + VNIC_DOWN}; + +enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1, + VNIC_RESET_MOBILITY, + VNIC_RESET_FATAL, + VNIC_RESET_NON_FATAL, + VNIC_RESET_TIMEOUT, + VNIC_RESET_CHANGE_PARAM, + VNIC_RESET_PASSIVE_INIT}; + +struct ibmvnic_rwi { + enum ibmvnic_reset_reason reset_reason; + struct list_head list; +}; + +struct ibmvnic_tunables { + u64 rx_queues; + u64 tx_queues; + u64 rx_entries; + u64 tx_entries; + u64 mtu; +}; + +struct ibmvnic_adapter { + struct vio_dev *vdev; + struct net_device *netdev; + struct ibmvnic_crq_queue crq; + u8 mac_addr[ETH_ALEN]; + struct ibmvnic_query_ip_offload_buffer ip_offload_buf; + dma_addr_t ip_offload_tok; + struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl; + dma_addr_t ip_offload_ctrl_tok; + u32 msg_enable; + + /* Vital Product Data (VPD) */ + struct ibmvnic_vpd *vpd; + char fw_version[32]; + + /* Statistics */ + struct ibmvnic_statistics stats; + dma_addr_t stats_token; + struct completion stats_done; + int replenish_no_mem; + int replenish_add_buff_success; + int replenish_add_buff_failure; + int replenish_task_cycles; + int tx_send_failed; + int tx_map_failed; + + struct ibmvnic_tx_queue_stats *tx_stats_buffers; + struct ibmvnic_rx_queue_stats *rx_stats_buffers; + + int phys_link_state; + int logical_link_state; + + u32 speed; + u8 duplex; + + /* login data */ + struct ibmvnic_login_buffer *login_buf; + dma_addr_t login_buf_token; + int login_buf_sz; + + struct ibmvnic_login_rsp_buffer *login_rsp_buf; + dma_addr_t login_rsp_buf_token; + int login_rsp_buf_sz; + + atomic_t running_cap_crqs; + + struct ibmvnic_sub_crq_queue **tx_scrq ____cacheline_aligned; + struct ibmvnic_sub_crq_queue **rx_scrq ____cacheline_aligned; + + /* rx structs */ + struct napi_struct *napi; + struct ibmvnic_rx_pool *rx_pool; + u64 promisc; + + struct ibmvnic_tx_pool *tx_pool; + struct ibmvnic_tx_pool *tso_pool; + struct completion probe_done; + struct completion init_done; + int init_done_rc; + + struct completion fw_done; + /* Used for serialization of device commands */ + struct mutex fw_lock; + int fw_done_rc; + + struct completion reset_done; + int reset_done_rc; + bool wait_for_reset; + + /* partner capabilities */ + u64 min_tx_queues; + u64 min_rx_queues; + u64 min_rx_add_queues; + u64 max_tx_queues; + u64 max_rx_queues; + u64 max_rx_add_queues; + u64 req_tx_queues; + u64 req_rx_queues; + u64 req_rx_add_queues; + u64 min_tx_entries_per_subcrq; + u64 min_rx_add_entries_per_subcrq; + u64 max_tx_entries_per_subcrq; + u64 max_rx_add_entries_per_subcrq; + u64 req_tx_entries_per_subcrq; + u64 req_rx_add_entries_per_subcrq; + u64 tcp_ip_offload; + u64 promisc_requested; + u64 promisc_supported; + u64 min_mtu; + u64 max_mtu; + u64 req_mtu; + u64 prev_mtu; + u64 max_multicast_filters; + u64 vlan_header_insertion; + u64 rx_vlan_header_insertion; + u64 max_tx_sg_entries; + u64 rx_sg_supported; + u64 rx_sg_requested; + u64 opt_tx_comp_sub_queues; + u64 opt_rx_comp_queues; + u64 opt_rx_bufadd_q_per_rx_comp_q; + u64 opt_tx_entries_per_subcrq; + u64 opt_rxba_entries_per_subcrq; + __be64 tx_rx_desc_req; +#define MAX_MAP_ID 255 + DECLARE_BITMAP(map_ids, MAX_MAP_ID); + u32 num_active_rx_scrqs; + u32 num_active_rx_pools; + u32 num_active_rx_napi; + u32 num_active_tx_scrqs; + u32 num_active_tx_pools; + + u32 prev_rx_pool_size; + u32 prev_tx_pool_size; + u32 cur_rx_buf_sz; + u32 prev_rx_buf_sz; + + struct tasklet_struct tasklet; + enum vnic_state state; + /* Used for serialization of state field. When taking both state + * and rwi locks, take state lock first. + */ + spinlock_t state_lock; + enum ibmvnic_reset_reason reset_reason; + struct list_head rwi_list; + /* Used for serialization of rwi_list. When taking both state + * and rwi locks, take state lock first + */ + spinlock_t rwi_lock; + struct work_struct ibmvnic_reset; + struct delayed_work ibmvnic_delayed_reset; + unsigned long resetting; + /* last device reset time */ + unsigned long last_reset_time; + + bool napi_enabled; + bool from_passive_init; + bool login_pending; + /* protected by rcu */ + bool tx_queues_active; + bool failover_pending; + bool force_reset_recovery; + + struct ibmvnic_tunables desired; + struct ibmvnic_tunables fallback; +}; |