summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/cavium/thunder
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 01:02:30 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 01:02:30 +0000
commit76cb841cb886eef6b3bee341a2266c76578724ad (patch)
treef5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /drivers/net/ethernet/cavium/thunder
parentInitial commit. (diff)
downloadlinux-upstream.tar.xz
linux-upstream.zip
Adding upstream version 4.19.249.upstream/4.19.249upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--drivers/net/ethernet/cavium/thunder/Makefile13
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic.h639
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic_main.c1483
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic_reg.h233
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c885
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c2310
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.c1976
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.h377
-rw-r--r--drivers/net/ethernet/cavium/thunder/q_struct.h699
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.c1731
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.h266
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_xcv.c234
12 files changed, 10846 insertions, 0 deletions
diff --git a/drivers/net/ethernet/cavium/thunder/Makefile b/drivers/net/ethernet/cavium/thunder/Makefile
new file mode 100644
index 000000000..2fc6142d1
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Cavium's Thunder ethernet device
+#
+
+obj-$(CONFIG_THUNDER_NIC_RGX) += thunder_xcv.o
+obj-$(CONFIG_THUNDER_NIC_BGX) += thunder_bgx.o
+obj-$(CONFIG_THUNDER_NIC_PF) += nicpf.o
+obj-$(CONFIG_THUNDER_NIC_VF) += nicvf.o
+
+nicpf-y := nic_main.o
+nicvf-y := nicvf_main.o nicvf_queues.o
+nicvf-y += nicvf_ethtool.o
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
new file mode 100644
index 000000000..f4d817652
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -0,0 +1,639 @@
+/*
+ * Copyright (C) 2015 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef NIC_H
+#define NIC_H
+
+#include <linux/netdevice.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include "thunder_bgx.h"
+
+/* PCI device IDs */
+#define PCI_DEVICE_ID_THUNDER_NIC_PF 0xA01E
+#define PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF 0x0011
+#define PCI_DEVICE_ID_THUNDER_NIC_VF 0xA034
+#define PCI_DEVICE_ID_THUNDER_BGX 0xA026
+
+/* Subsystem device IDs */
+#define PCI_SUBSYS_DEVID_88XX_NIC_PF 0xA11E
+#define PCI_SUBSYS_DEVID_81XX_NIC_PF 0xA21E
+#define PCI_SUBSYS_DEVID_83XX_NIC_PF 0xA31E
+
+#define PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF 0xA11E
+#define PCI_SUBSYS_DEVID_88XX_NIC_VF 0xA134
+#define PCI_SUBSYS_DEVID_81XX_NIC_VF 0xA234
+#define PCI_SUBSYS_DEVID_83XX_NIC_VF 0xA334
+
+
+/* PCI BAR nos */
+#define PCI_CFG_REG_BAR_NUM 0
+#define PCI_MSIX_REG_BAR_NUM 4
+
+/* NIC SRIOV VF count */
+#define MAX_NUM_VFS_SUPPORTED 128
+#define DEFAULT_NUM_VF_ENABLED 8
+
+#define NIC_TNS_BYPASS_MODE 0
+#define NIC_TNS_MODE 1
+
+/* NIC priv flags */
+#define NIC_SRIOV_ENABLED BIT(0)
+
+/* Min/Max packet size */
+#define NIC_HW_MIN_FRS 64
+#define NIC_HW_MAX_FRS 9190 /* Excluding L2 header and FCS */
+
+/* Max pkinds */
+#define NIC_MAX_PKIND 16
+
+/* Max when CPI_ALG is IP diffserv */
+#define NIC_MAX_CPI_PER_LMAC 64
+
+/* NIC VF Interrupts */
+#define NICVF_INTR_CQ 0
+#define NICVF_INTR_SQ 1
+#define NICVF_INTR_RBDR 2
+#define NICVF_INTR_PKT_DROP 3
+#define NICVF_INTR_TCP_TIMER 4
+#define NICVF_INTR_MBOX 5
+#define NICVF_INTR_QS_ERR 6
+
+#define NICVF_INTR_CQ_SHIFT 0
+#define NICVF_INTR_SQ_SHIFT 8
+#define NICVF_INTR_RBDR_SHIFT 16
+#define NICVF_INTR_PKT_DROP_SHIFT 20
+#define NICVF_INTR_TCP_TIMER_SHIFT 21
+#define NICVF_INTR_MBOX_SHIFT 22
+#define NICVF_INTR_QS_ERR_SHIFT 23
+
+#define NICVF_INTR_CQ_MASK (0xFF << NICVF_INTR_CQ_SHIFT)
+#define NICVF_INTR_SQ_MASK (0xFF << NICVF_INTR_SQ_SHIFT)
+#define NICVF_INTR_RBDR_MASK (0x03 << NICVF_INTR_RBDR_SHIFT)
+#define NICVF_INTR_PKT_DROP_MASK BIT(NICVF_INTR_PKT_DROP_SHIFT)
+#define NICVF_INTR_TCP_TIMER_MASK BIT(NICVF_INTR_TCP_TIMER_SHIFT)
+#define NICVF_INTR_MBOX_MASK BIT(NICVF_INTR_MBOX_SHIFT)
+#define NICVF_INTR_QS_ERR_MASK BIT(NICVF_INTR_QS_ERR_SHIFT)
+
+/* MSI-X interrupts */
+#define NIC_PF_MSIX_VECTORS 10
+#define NIC_VF_MSIX_VECTORS 20
+
+#define NIC_PF_INTR_ID_ECC0_SBE 0
+#define NIC_PF_INTR_ID_ECC0_DBE 1
+#define NIC_PF_INTR_ID_ECC1_SBE 2
+#define NIC_PF_INTR_ID_ECC1_DBE 3
+#define NIC_PF_INTR_ID_ECC2_SBE 4
+#define NIC_PF_INTR_ID_ECC2_DBE 5
+#define NIC_PF_INTR_ID_ECC3_SBE 6
+#define NIC_PF_INTR_ID_ECC3_DBE 7
+#define NIC_PF_INTR_ID_MBOX0 8
+#define NIC_PF_INTR_ID_MBOX1 9
+
+/* Minimum FIFO level before all packets for the CQ are dropped
+ *
+ * This value ensures that once a packet has been "accepted"
+ * for reception it will not get dropped due to non-availability
+ * of CQ descriptor. An errata in HW mandates this value to be
+ * atleast 0x100.
+ */
+#define NICPF_CQM_MIN_DROP_LEVEL 0x100
+
+/* Global timer for CQ timer thresh interrupts
+ * Calculated for SCLK of 700Mhz
+ * value written should be a 1/16th of what is expected
+ *
+ * 1 tick per 0.025usec
+ */
+#define NICPF_CLK_PER_INT_TICK 1
+
+/* Time to wait before we decide that a SQ is stuck.
+ *
+ * Since both pkt rx and tx notifications are done with same CQ,
+ * when packets are being received at very high rate (eg: L2 forwarding)
+ * then freeing transmitted skbs will be delayed and watchdog
+ * will kick in, resetting interface. Hence keeping this value high.
+ */
+#define NICVF_TX_TIMEOUT (50 * HZ)
+
+struct nicvf_cq_poll {
+ struct nicvf *nicvf;
+ u8 cq_idx; /* Completion queue index */
+ struct napi_struct napi;
+};
+
+#define NIC_MAX_RSS_HASH_BITS 8
+#define NIC_MAX_RSS_IDR_TBL_SIZE (1 << NIC_MAX_RSS_HASH_BITS)
+#define RSS_HASH_KEY_SIZE 5 /* 320 bit key */
+
+struct nicvf_rss_info {
+ bool enable;
+#define RSS_L2_EXTENDED_HASH_ENA BIT(0)
+#define RSS_IP_HASH_ENA BIT(1)
+#define RSS_TCP_HASH_ENA BIT(2)
+#define RSS_TCP_SYN_DIS BIT(3)
+#define RSS_UDP_HASH_ENA BIT(4)
+#define RSS_L4_EXTENDED_HASH_ENA BIT(5)
+#define RSS_ROCE_ENA BIT(6)
+#define RSS_L3_BI_DIRECTION_ENA BIT(7)
+#define RSS_L4_BI_DIRECTION_ENA BIT(8)
+ u64 cfg;
+ u8 hash_bits;
+ u16 rss_size;
+ u8 ind_tbl[NIC_MAX_RSS_IDR_TBL_SIZE];
+ u64 key[RSS_HASH_KEY_SIZE];
+} ____cacheline_aligned_in_smp;
+
+struct nicvf_pfc {
+ u8 autoneg;
+ u8 fc_rx;
+ u8 fc_tx;
+};
+
+enum rx_stats_reg_offset {
+ RX_OCTS = 0x0,
+ RX_UCAST = 0x1,
+ RX_BCAST = 0x2,
+ RX_MCAST = 0x3,
+ RX_RED = 0x4,
+ RX_RED_OCTS = 0x5,
+ RX_ORUN = 0x6,
+ RX_ORUN_OCTS = 0x7,
+ RX_FCS = 0x8,
+ RX_L2ERR = 0x9,
+ RX_DRP_BCAST = 0xa,
+ RX_DRP_MCAST = 0xb,
+ RX_DRP_L3BCAST = 0xc,
+ RX_DRP_L3MCAST = 0xd,
+ RX_STATS_ENUM_LAST,
+};
+
+enum tx_stats_reg_offset {
+ TX_OCTS = 0x0,
+ TX_UCAST = 0x1,
+ TX_BCAST = 0x2,
+ TX_MCAST = 0x3,
+ TX_DROP = 0x4,
+ TX_STATS_ENUM_LAST,
+};
+
+struct nicvf_hw_stats {
+ u64 rx_bytes;
+ u64 rx_frames;
+ u64 rx_ucast_frames;
+ u64 rx_bcast_frames;
+ u64 rx_mcast_frames;
+ u64 rx_drops;
+ u64 rx_drop_red;
+ u64 rx_drop_red_bytes;
+ u64 rx_drop_overrun;
+ u64 rx_drop_overrun_bytes;
+ u64 rx_drop_bcast;
+ u64 rx_drop_mcast;
+ u64 rx_drop_l3_bcast;
+ u64 rx_drop_l3_mcast;
+ u64 rx_fcs_errors;
+ u64 rx_l2_errors;
+
+ u64 tx_bytes;
+ u64 tx_frames;
+ u64 tx_ucast_frames;
+ u64 tx_bcast_frames;
+ u64 tx_mcast_frames;
+ u64 tx_drops;
+};
+
+struct nicvf_drv_stats {
+ /* CQE Rx errs */
+ u64 rx_bgx_truncated_pkts;
+ u64 rx_jabber_errs;
+ u64 rx_fcs_errs;
+ u64 rx_bgx_errs;
+ u64 rx_prel2_errs;
+ u64 rx_l2_hdr_malformed;
+ u64 rx_oversize;
+ u64 rx_undersize;
+ u64 rx_l2_len_mismatch;
+ u64 rx_l2_pclp;
+ u64 rx_ip_ver_errs;
+ u64 rx_ip_csum_errs;
+ u64 rx_ip_hdr_malformed;
+ u64 rx_ip_payload_malformed;
+ u64 rx_ip_ttl_errs;
+ u64 rx_l3_pclp;
+ u64 rx_l4_malformed;
+ u64 rx_l4_csum_errs;
+ u64 rx_udp_len_errs;
+ u64 rx_l4_port_errs;
+ u64 rx_tcp_flag_errs;
+ u64 rx_tcp_offset_errs;
+ u64 rx_l4_pclp;
+ u64 rx_truncated_pkts;
+
+ /* CQE Tx errs */
+ u64 tx_desc_fault;
+ u64 tx_hdr_cons_err;
+ u64 tx_subdesc_err;
+ u64 tx_max_size_exceeded;
+ u64 tx_imm_size_oflow;
+ u64 tx_data_seq_err;
+ u64 tx_mem_seq_err;
+ u64 tx_lock_viol;
+ u64 tx_data_fault;
+ u64 tx_tstmp_conflict;
+ u64 tx_tstmp_timeout;
+ u64 tx_mem_fault;
+ u64 tx_csum_overlap;
+ u64 tx_csum_overflow;
+
+ /* driver debug stats */
+ u64 tx_tso;
+ u64 tx_timeout;
+ u64 txq_stop;
+ u64 txq_wake;
+
+ u64 rcv_buffer_alloc_failures;
+ u64 page_alloc;
+
+ struct u64_stats_sync syncp;
+};
+
+struct cavium_ptp;
+
+struct xcast_addr_list {
+ int count;
+ u64 mc[];
+};
+
+struct nicvf_work {
+ struct delayed_work work;
+ u8 mode;
+ struct xcast_addr_list *mc;
+};
+
+struct nicvf {
+ struct nicvf *pnicvf;
+ struct net_device *netdev;
+ struct pci_dev *pdev;
+ void __iomem *reg_base;
+ struct bpf_prog *xdp_prog;
+#define MAX_QUEUES_PER_QSET 8
+ struct queue_set *qs;
+ void *iommu_domain;
+ u8 vf_id;
+ u8 sqs_id;
+ bool sqs_mode;
+ bool hw_tso;
+ bool t88;
+
+ /* Receive buffer alloc */
+ u32 rb_page_offset;
+ u16 rb_pageref;
+ bool rb_alloc_fail;
+ bool rb_work_scheduled;
+ struct page *rb_page;
+ struct delayed_work rbdr_work;
+ struct tasklet_struct rbdr_task;
+
+ /* Secondary Qset */
+ u8 sqs_count;
+#define MAX_SQS_PER_VF_SINGLE_NODE 5
+#define MAX_SQS_PER_VF 11
+ struct nicvf *snicvf[MAX_SQS_PER_VF];
+
+ /* Queue count */
+ u8 rx_queues;
+ u8 tx_queues;
+ u8 xdp_tx_queues;
+ u8 max_queues;
+
+ u8 node;
+ u8 cpi_alg;
+ bool link_up;
+ u8 mac_type;
+ u8 duplex;
+ u32 speed;
+ bool tns_mode;
+ bool loopback_supported;
+ struct nicvf_rss_info rss_info;
+ struct nicvf_pfc pfc;
+ struct tasklet_struct qs_err_task;
+ struct work_struct reset_task;
+ struct nicvf_work rx_mode_work;
+ /* spinlock to protect workqueue arguments from concurrent access */
+ spinlock_t rx_mode_wq_lock;
+
+ /* PTP timestamp */
+ struct cavium_ptp *ptp_clock;
+ /* Inbound timestamping is on */
+ bool hw_rx_tstamp;
+ /* When the packet that requires timestamping is sent, hardware inserts
+ * two entries to the completion queue. First is the regular
+ * CQE_TYPE_SEND entry that signals that the packet was sent.
+ * The second is CQE_TYPE_SEND_PTP that contains the actual timestamp
+ * for that packet.
+ * `ptp_skb` is initialized in the handler for the CQE_TYPE_SEND
+ * entry and is used and zeroed in the handler for the CQE_TYPE_SEND_PTP
+ * entry.
+ * So `ptp_skb` is used to hold the pointer to the packet between
+ * the calls to CQE_TYPE_SEND and CQE_TYPE_SEND_PTP handlers.
+ */
+ struct sk_buff *ptp_skb;
+ /* `tx_ptp_skbs` is set when the hardware is sending a packet that
+ * requires timestamping. Cavium hardware can not process more than one
+ * such packet at once so this is set each time the driver submits
+ * a packet that requires timestamping to the send queue and clears
+ * each time it receives the entry on the completion queue saying
+ * that such packet was sent.
+ * So `tx_ptp_skbs` prevents driver from submitting more than one
+ * packet that requires timestamping to the hardware for transmitting.
+ */
+ atomic_t tx_ptp_skbs;
+
+ /* Interrupt coalescing settings */
+ u32 cq_coalesce_usecs;
+ u32 msg_enable;
+
+ /* Stats */
+ struct nicvf_hw_stats hw_stats;
+ struct nicvf_drv_stats __percpu *drv_stats;
+ struct bgx_stats bgx_stats;
+
+ /* Napi */
+ struct nicvf_cq_poll *napi[8];
+
+ /* MSI-X */
+ u8 num_vec;
+ char irq_name[NIC_VF_MSIX_VECTORS][IFNAMSIZ + 15];
+ bool irq_allocated[NIC_VF_MSIX_VECTORS];
+ cpumask_var_t affinity_mask[NIC_VF_MSIX_VECTORS];
+
+ /* VF <-> PF mailbox communication */
+ bool pf_acked;
+ bool pf_nacked;
+ bool set_mac_pending;
+} ____cacheline_aligned_in_smp;
+
+/* PF <--> VF Mailbox communication
+ * Eight 64bit registers are shared between PF and VF.
+ * Separate set for each VF.
+ * Writing '1' into last register mbx7 means end of message.
+ */
+
+/* PF <--> VF mailbox communication */
+#define NIC_PF_VF_MAILBOX_SIZE 2
+#define NIC_MBOX_MSG_TIMEOUT 2000 /* ms */
+
+/* Mailbox message types */
+#define NIC_MBOX_MSG_READY 0x01 /* Is PF ready to rcv msgs */
+#define NIC_MBOX_MSG_ACK 0x02 /* ACK the message received */
+#define NIC_MBOX_MSG_NACK 0x03 /* NACK the message received */
+#define NIC_MBOX_MSG_QS_CFG 0x04 /* Configure Qset */
+#define NIC_MBOX_MSG_RQ_CFG 0x05 /* Configure receive queue */
+#define NIC_MBOX_MSG_SQ_CFG 0x06 /* Configure Send queue */
+#define NIC_MBOX_MSG_RQ_DROP_CFG 0x07 /* Configure receive queue */
+#define NIC_MBOX_MSG_SET_MAC 0x08 /* Add MAC ID to DMAC filter */
+#define NIC_MBOX_MSG_SET_MAX_FRS 0x09 /* Set max frame size */
+#define NIC_MBOX_MSG_CPI_CFG 0x0A /* Config CPI, RSSI */
+#define NIC_MBOX_MSG_RSS_SIZE 0x0B /* Get RSS indir_tbl size */
+#define NIC_MBOX_MSG_RSS_CFG 0x0C /* Config RSS table */
+#define NIC_MBOX_MSG_RSS_CFG_CONT 0x0D /* RSS config continuation */
+#define NIC_MBOX_MSG_RQ_BP_CFG 0x0E /* RQ backpressure config */
+#define NIC_MBOX_MSG_RQ_SW_SYNC 0x0F /* Flush inflight pkts to RQ */
+#define NIC_MBOX_MSG_BGX_STATS 0x10 /* Get stats from BGX */
+#define NIC_MBOX_MSG_BGX_LINK_CHANGE 0x11 /* BGX:LMAC link status */
+#define NIC_MBOX_MSG_ALLOC_SQS 0x12 /* Allocate secondary Qset */
+#define NIC_MBOX_MSG_NICVF_PTR 0x13 /* Send nicvf ptr to PF */
+#define NIC_MBOX_MSG_PNICVF_PTR 0x14 /* Get primary qset nicvf ptr */
+#define NIC_MBOX_MSG_SNICVF_PTR 0x15 /* Send sqet nicvf ptr to PVF */
+#define NIC_MBOX_MSG_LOOPBACK 0x16 /* Set interface in loopback */
+#define NIC_MBOX_MSG_RESET_STAT_COUNTER 0x17 /* Reset statistics counters */
+#define NIC_MBOX_MSG_PFC 0x18 /* Pause frame control */
+#define NIC_MBOX_MSG_PTP_CFG 0x19 /* HW packet timestamp */
+#define NIC_MBOX_MSG_CFG_DONE 0xF0 /* VF configuration done */
+#define NIC_MBOX_MSG_SHUTDOWN 0xF1 /* VF is being shutdown */
+#define NIC_MBOX_MSG_RESET_XCAST 0xF2 /* Reset DCAM filtering mode */
+#define NIC_MBOX_MSG_ADD_MCAST 0xF3 /* Add MAC to DCAM filters */
+#define NIC_MBOX_MSG_SET_XCAST 0xF4 /* Set MCAST/BCAST RX mode */
+
+struct nic_cfg_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 node_id;
+ u8 tns_mode:1;
+ u8 sqs_mode:1;
+ u8 loopback_supported:1;
+ u8 mac_addr[ETH_ALEN];
+};
+
+/* Qset configuration */
+struct qs_cfg_msg {
+ u8 msg;
+ u8 num;
+ u8 sqs_count;
+ u64 cfg;
+};
+
+/* Receive queue configuration */
+struct rq_cfg_msg {
+ u8 msg;
+ u8 qs_num;
+ u8 rq_num;
+ u64 cfg;
+};
+
+/* Send queue configuration */
+struct sq_cfg_msg {
+ u8 msg;
+ u8 qs_num;
+ u8 sq_num;
+ bool sqs_mode;
+ u64 cfg;
+};
+
+/* Set VF's MAC address */
+struct set_mac_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 mac_addr[ETH_ALEN];
+};
+
+/* Set Maximum frame size */
+struct set_frs_msg {
+ u8 msg;
+ u8 vf_id;
+ u16 max_frs;
+};
+
+/* Set CPI algorithm type */
+struct cpi_cfg_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 rq_cnt;
+ u8 cpi_alg;
+};
+
+/* Get RSS table size */
+struct rss_sz_msg {
+ u8 msg;
+ u8 vf_id;
+ u16 ind_tbl_size;
+};
+
+/* Set RSS configuration */
+struct rss_cfg_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 hash_bits;
+ u8 tbl_len;
+ u8 tbl_offset;
+#define RSS_IND_TBL_LEN_PER_MBX_MSG 8
+ u8 ind_tbl[RSS_IND_TBL_LEN_PER_MBX_MSG];
+};
+
+struct bgx_stats_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 rx;
+ u8 idx;
+ u64 stats;
+};
+
+/* Physical interface link status */
+struct bgx_link_status {
+ u8 msg;
+ u8 mac_type;
+ u8 link_up;
+ u8 duplex;
+ u32 speed;
+};
+
+/* Get Extra Qset IDs */
+struct sqs_alloc {
+ u8 msg;
+ u8 vf_id;
+ u8 qs_count;
+};
+
+struct nicvf_ptr {
+ u8 msg;
+ u8 vf_id;
+ bool sqs_mode;
+ u8 sqs_id;
+ u64 nicvf;
+};
+
+/* Set interface in loopback mode */
+struct set_loopback {
+ u8 msg;
+ u8 vf_id;
+ bool enable;
+};
+
+/* Reset statistics counters */
+struct reset_stat_cfg {
+ u8 msg;
+ /* Bitmap to select NIC_PF_VNIC(vf_id)_RX_STAT(0..13) */
+ u16 rx_stat_mask;
+ /* Bitmap to select NIC_PF_VNIC(vf_id)_TX_STAT(0..4) */
+ u8 tx_stat_mask;
+ /* Bitmap to select NIC_PF_QS(0..127)_RQ(0..7)_STAT(0..1)
+ * bit14, bit15 NIC_PF_QS(vf_id)_RQ7_STAT(0..1)
+ * bit12, bit13 NIC_PF_QS(vf_id)_RQ6_STAT(0..1)
+ * ..
+ * bit2, bit3 NIC_PF_QS(vf_id)_RQ1_STAT(0..1)
+ * bit0, bit1 NIC_PF_QS(vf_id)_RQ0_STAT(0..1)
+ */
+ u16 rq_stat_mask;
+ /* Bitmap to select NIC_PF_QS(0..127)_SQ(0..7)_STAT(0..1)
+ * bit14, bit15 NIC_PF_QS(vf_id)_SQ7_STAT(0..1)
+ * bit12, bit13 NIC_PF_QS(vf_id)_SQ6_STAT(0..1)
+ * ..
+ * bit2, bit3 NIC_PF_QS(vf_id)_SQ1_STAT(0..1)
+ * bit0, bit1 NIC_PF_QS(vf_id)_SQ0_STAT(0..1)
+ */
+ u16 sq_stat_mask;
+};
+
+struct pfc {
+ u8 msg;
+ u8 get; /* Get or set PFC settings */
+ u8 autoneg;
+ u8 fc_rx;
+ u8 fc_tx;
+};
+
+struct set_ptp {
+ u8 msg;
+ bool enable;
+};
+
+struct xcast {
+ u8 msg;
+ union {
+ u8 mode;
+ u64 mac;
+ } data;
+};
+
+/* 128 bit shared memory between PF and each VF */
+union nic_mbx {
+ struct { u8 msg; } msg;
+ struct nic_cfg_msg nic_cfg;
+ struct qs_cfg_msg qs;
+ struct rq_cfg_msg rq;
+ struct sq_cfg_msg sq;
+ struct set_mac_msg mac;
+ struct set_frs_msg frs;
+ struct cpi_cfg_msg cpi_cfg;
+ struct rss_sz_msg rss_size;
+ struct rss_cfg_msg rss_cfg;
+ struct bgx_stats_msg bgx_stats;
+ struct bgx_link_status link_status;
+ struct sqs_alloc sqs_alloc;
+ struct nicvf_ptr nicvf;
+ struct set_loopback lbk;
+ struct reset_stat_cfg reset_stat;
+ struct pfc pfc;
+ struct set_ptp ptp;
+ struct xcast xcast;
+};
+
+#define NIC_NODE_ID_MASK 0x03
+#define NIC_NODE_ID_SHIFT 44
+
+static inline int nic_get_node_id(struct pci_dev *pdev)
+{
+ u64 addr = pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM);
+ return ((addr >> NIC_NODE_ID_SHIFT) & NIC_NODE_ID_MASK);
+}
+
+static inline bool pass1_silicon(struct pci_dev *pdev)
+{
+ return (pdev->revision < 8) &&
+ (pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF);
+}
+
+static inline bool pass2_silicon(struct pci_dev *pdev)
+{
+ return (pdev->revision >= 8) &&
+ (pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF);
+}
+
+int nicvf_set_real_num_queues(struct net_device *netdev,
+ int tx_queues, int rx_queues);
+int nicvf_open(struct net_device *netdev);
+int nicvf_stop(struct net_device *netdev);
+int nicvf_send_msg_to_pf(struct nicvf *vf, union nic_mbx *mbx);
+void nicvf_config_rss(struct nicvf *nic);
+void nicvf_set_rss_key(struct nicvf *nic);
+void nicvf_set_ethtool_ops(struct net_device *netdev);
+void nicvf_update_stats(struct nicvf *nic);
+void nicvf_update_lmac_stats(struct nicvf *nic);
+
+#endif /* NIC_H */
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
new file mode 100644
index 000000000..7c0a67f1f
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -0,0 +1,1483 @@
+/*
+ * Copyright (C) 2015 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/etherdevice.h>
+#include <linux/of.h>
+#include <linux/if_vlan.h>
+
+#include "nic_reg.h"
+#include "nic.h"
+#include "q_struct.h"
+#include "thunder_bgx.h"
+
+#define DRV_NAME "nicpf"
+#define DRV_VERSION "1.0"
+
+#define NIC_VF_PER_MBX_REG 64
+
+struct hw_info {
+ u8 bgx_cnt;
+ u8 chans_per_lmac;
+ u8 chans_per_bgx; /* Rx/Tx chans */
+ u8 chans_per_rgx;
+ u8 chans_per_lbk;
+ u16 cpi_cnt;
+ u16 rssi_cnt;
+ u16 rss_ind_tbl_size;
+ u16 tl4_cnt;
+ u16 tl3_cnt;
+ u8 tl2_cnt;
+ u8 tl1_cnt;
+ bool tl1_per_bgx; /* TL1 per BGX or per LMAC */
+};
+
+struct nicpf {
+ struct pci_dev *pdev;
+ struct hw_info *hw;
+ u8 node;
+ unsigned int flags;
+ u8 num_vf_en; /* No of VF enabled */
+ bool vf_enabled[MAX_NUM_VFS_SUPPORTED];
+ void __iomem *reg_base; /* Register start address */
+ u8 num_sqs_en; /* Secondary qsets enabled */
+ u64 nicvf[MAX_NUM_VFS_SUPPORTED];
+ u8 vf_sqs[MAX_NUM_VFS_SUPPORTED][MAX_SQS_PER_VF];
+ u8 pqs_vf[MAX_NUM_VFS_SUPPORTED];
+ bool sqs_used[MAX_NUM_VFS_SUPPORTED];
+ struct pkind_cfg pkind;
+#define NIC_SET_VF_LMAC_MAP(bgx, lmac) (((bgx & 0xF) << 4) | (lmac & 0xF))
+#define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF)
+#define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF)
+ u8 *vf_lmac_map;
+ struct delayed_work dwork;
+ struct workqueue_struct *check_link;
+ u8 *link;
+ u8 *duplex;
+ u32 *speed;
+ u16 cpi_base[MAX_NUM_VFS_SUPPORTED];
+ u16 rssi_base[MAX_NUM_VFS_SUPPORTED];
+ bool mbx_lock[MAX_NUM_VFS_SUPPORTED];
+
+ /* MSI-X */
+ u8 num_vec;
+ bool irq_allocated[NIC_PF_MSIX_VECTORS];
+ char irq_name[NIC_PF_MSIX_VECTORS][20];
+};
+
+/* Supported devices */
+static const struct pci_device_id nic_id_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_PF) },
+ { 0, } /* end of table */
+};
+
+MODULE_AUTHOR("Sunil Goutham");
+MODULE_DESCRIPTION("Cavium Thunder NIC Physical Function Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, nic_id_table);
+
+/* The Cavium ThunderX network controller can *only* be found in SoCs
+ * containing the ThunderX ARM64 CPU implementation. All accesses to the device
+ * registers on this platform are implicitly strongly ordered with respect
+ * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use
+ * with no memory barriers in this driver. The readq()/writeq() functions add
+ * explicit ordering operation which in this case are redundant, and only
+ * add overhead.
+ */
+
+/* Register read/write APIs */
+static void nic_reg_write(struct nicpf *nic, u64 offset, u64 val)
+{
+ writeq_relaxed(val, nic->reg_base + offset);
+}
+
+static u64 nic_reg_read(struct nicpf *nic, u64 offset)
+{
+ return readq_relaxed(nic->reg_base + offset);
+}
+
+/* PF -> VF mailbox communication APIs */
+static void nic_enable_mbx_intr(struct nicpf *nic)
+{
+ int vf_cnt = pci_sriov_get_totalvfs(nic->pdev);
+
+#define INTR_MASK(vfs) ((vfs < 64) ? (BIT_ULL(vfs) - 1) : (~0ull))
+
+ /* Clear it, to avoid spurious interrupts (if any) */
+ nic_reg_write(nic, NIC_PF_MAILBOX_INT, INTR_MASK(vf_cnt));
+
+ /* Enable mailbox interrupt for all VFs */
+ nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, INTR_MASK(vf_cnt));
+ /* One mailbox intr enable reg per 64 VFs */
+ if (vf_cnt > 64) {
+ nic_reg_write(nic, NIC_PF_MAILBOX_INT + sizeof(u64),
+ INTR_MASK(vf_cnt - 64));
+ nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64),
+ INTR_MASK(vf_cnt - 64));
+ }
+}
+
+static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg)
+{
+ nic_reg_write(nic, NIC_PF_MAILBOX_INT + (mbx_reg << 3), BIT_ULL(vf));
+}
+
+static u64 nic_get_mbx_addr(int vf)
+{
+ return NIC_PF_VF_0_127_MAILBOX_0_1 + (vf << NIC_VF_NUM_SHIFT);
+}
+
+/* Send a mailbox message to VF
+ * @vf: vf to which this message to be sent
+ * @mbx: Message to be sent
+ */
+static void nic_send_msg_to_vf(struct nicpf *nic, int vf, union nic_mbx *mbx)
+{
+ void __iomem *mbx_addr = nic->reg_base + nic_get_mbx_addr(vf);
+ u64 *msg = (u64 *)mbx;
+
+ /* In first revision HW, mbox interrupt is triggerred
+ * when PF writes to MBOX(1), in next revisions when
+ * PF writes to MBOX(0)
+ */
+ if (pass1_silicon(nic->pdev)) {
+ /* see the comment for nic_reg_write()/nic_reg_read()
+ * functions above
+ */
+ writeq_relaxed(msg[0], mbx_addr);
+ writeq_relaxed(msg[1], mbx_addr + 8);
+ } else {
+ writeq_relaxed(msg[1], mbx_addr + 8);
+ writeq_relaxed(msg[0], mbx_addr);
+ }
+}
+
+/* Responds to VF's READY message with VF's
+ * ID, node, MAC address e.t.c
+ * @vf: VF which sent READY message
+ */
+static void nic_mbx_send_ready(struct nicpf *nic, int vf)
+{
+ union nic_mbx mbx = {};
+ int bgx_idx, lmac;
+ const char *mac;
+
+ mbx.nic_cfg.msg = NIC_MBOX_MSG_READY;
+ mbx.nic_cfg.vf_id = vf;
+
+ mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE;
+
+ if (vf < nic->num_vf_en) {
+ bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+ mac = bgx_get_lmac_mac(nic->node, bgx_idx, lmac);
+ if (mac)
+ ether_addr_copy((u8 *)&mbx.nic_cfg.mac_addr, mac);
+ }
+ mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false;
+ mbx.nic_cfg.node_id = nic->node;
+
+ mbx.nic_cfg.loopback_supported = vf < nic->num_vf_en;
+
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* ACKs VF's mailbox message
+ * @vf: VF to which ACK to be sent
+ */
+static void nic_mbx_send_ack(struct nicpf *nic, int vf)
+{
+ union nic_mbx mbx = {};
+
+ mbx.msg.msg = NIC_MBOX_MSG_ACK;
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* NACKs VF's mailbox message that PF is not able to
+ * complete the action
+ * @vf: VF to which ACK to be sent
+ */
+static void nic_mbx_send_nack(struct nicpf *nic, int vf)
+{
+ union nic_mbx mbx = {};
+
+ mbx.msg.msg = NIC_MBOX_MSG_NACK;
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* Flush all in flight receive packets to memory and
+ * bring down an active RQ
+ */
+static int nic_rcv_queue_sw_sync(struct nicpf *nic)
+{
+ u16 timeout = ~0x00;
+
+ nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x01);
+ /* Wait till sync cycle is finished */
+ while (timeout) {
+ if (nic_reg_read(nic, NIC_PF_SW_SYNC_RX_DONE) & 0x1)
+ break;
+ timeout--;
+ }
+ nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x00);
+ if (!timeout) {
+ dev_err(&nic->pdev->dev, "Receive queue software sync failed");
+ return 1;
+ }
+ return 0;
+}
+
+/* Get BGX Rx/Tx stats and respond to VF's request */
+static void nic_get_bgx_stats(struct nicpf *nic, struct bgx_stats_msg *bgx)
+{
+ int bgx_idx, lmac;
+ union nic_mbx mbx = {};
+
+ bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]);
+
+ mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS;
+ mbx.bgx_stats.vf_id = bgx->vf_id;
+ mbx.bgx_stats.rx = bgx->rx;
+ mbx.bgx_stats.idx = bgx->idx;
+ if (bgx->rx)
+ mbx.bgx_stats.stats = bgx_get_rx_stats(nic->node, bgx_idx,
+ lmac, bgx->idx);
+ else
+ mbx.bgx_stats.stats = bgx_get_tx_stats(nic->node, bgx_idx,
+ lmac, bgx->idx);
+ nic_send_msg_to_vf(nic, bgx->vf_id, &mbx);
+}
+
+/* Update hardware min/max frame size */
+static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf)
+{
+ int bgx, lmac, lmac_cnt;
+ u64 lmac_credits;
+
+ if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS))
+ return 1;
+
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac += bgx * MAX_LMAC_PER_BGX;
+
+ new_frs += VLAN_ETH_HLEN + ETH_FCS_LEN + 4;
+
+ /* Update corresponding LMAC credits */
+ lmac_cnt = bgx_get_lmac_count(nic->node, bgx);
+ lmac_credits = nic_reg_read(nic, NIC_PF_LMAC_0_7_CREDIT + (lmac * 8));
+ lmac_credits &= ~(0xFFFFFULL << 12);
+ lmac_credits |= (((((48 * 1024) / lmac_cnt) - new_frs) / 16) << 12);
+ nic_reg_write(nic, NIC_PF_LMAC_0_7_CREDIT + (lmac * 8), lmac_credits);
+
+ /* Enforce MTU in HW
+ * This config is supported only from 88xx pass 2.0 onwards.
+ */
+ if (!pass1_silicon(nic->pdev))
+ nic_reg_write(nic,
+ NIC_PF_LMAC_0_7_CFG2 + (lmac * 8), new_frs);
+ return 0;
+}
+
+/* Set minimum transmit packet size */
+static void nic_set_tx_pkt_pad(struct nicpf *nic, int size)
+{
+ int lmac, max_lmac;
+ u16 sdevid;
+ u64 lmac_cfg;
+
+ /* There is a issue in HW where-in while sending GSO sized
+ * pkts as part of TSO, if pkt len falls below this size
+ * NIC will zero PAD packet and also updates IP total length.
+ * Hence set this value to lessthan min pkt size of MAC+IP+TCP
+ * headers, BGX will do the padding to transmit 64 byte pkt.
+ */
+ if (size > 52)
+ size = 52;
+
+ pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+ /* 81xx's RGX has only one LMAC */
+ if (sdevid == PCI_SUBSYS_DEVID_81XX_NIC_PF)
+ max_lmac = ((nic->hw->bgx_cnt - 1) * MAX_LMAC_PER_BGX) + 1;
+ else
+ max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX;
+
+ for (lmac = 0; lmac < max_lmac; lmac++) {
+ lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3));
+ lmac_cfg &= ~(0xF << 2);
+ lmac_cfg |= ((size / 4) << 2);
+ nic_reg_write(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3), lmac_cfg);
+ }
+}
+
+/* Function to check number of LMACs present and set VF::LMAC mapping.
+ * Mapping will be used while initializing channels.
+ */
+static void nic_set_lmac_vf_mapping(struct nicpf *nic)
+{
+ unsigned bgx_map = bgx_get_map(nic->node);
+ int bgx, next_bgx_lmac = 0;
+ int lmac, lmac_cnt = 0;
+ u64 lmac_credit;
+
+ nic->num_vf_en = 0;
+
+ for (bgx = 0; bgx < nic->hw->bgx_cnt; bgx++) {
+ if (!(bgx_map & (1 << bgx)))
+ continue;
+ lmac_cnt = bgx_get_lmac_count(nic->node, bgx);
+ for (lmac = 0; lmac < lmac_cnt; lmac++)
+ nic->vf_lmac_map[next_bgx_lmac++] =
+ NIC_SET_VF_LMAC_MAP(bgx, lmac);
+ nic->num_vf_en += lmac_cnt;
+
+ /* Program LMAC credits */
+ lmac_credit = (1ull << 1); /* channel credit enable */
+ lmac_credit |= (0x1ff << 2); /* Max outstanding pkt count */
+ /* 48KB BGX Tx buffer size, each unit is of size 16bytes */
+ lmac_credit |= (((((48 * 1024) / lmac_cnt) -
+ NIC_HW_MAX_FRS) / 16) << 12);
+ lmac = bgx * MAX_LMAC_PER_BGX;
+ for (; lmac < lmac_cnt + (bgx * MAX_LMAC_PER_BGX); lmac++)
+ nic_reg_write(nic,
+ NIC_PF_LMAC_0_7_CREDIT + (lmac * 8),
+ lmac_credit);
+
+ /* On CN81XX there are only 8 VFs but max possible no of
+ * interfaces are 9.
+ */
+ if (nic->num_vf_en >= pci_sriov_get_totalvfs(nic->pdev)) {
+ nic->num_vf_en = pci_sriov_get_totalvfs(nic->pdev);
+ break;
+ }
+ }
+}
+
+static void nic_get_hw_info(struct nicpf *nic)
+{
+ u16 sdevid;
+ struct hw_info *hw = nic->hw;
+
+ pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+
+ switch (sdevid) {
+ case PCI_SUBSYS_DEVID_88XX_NIC_PF:
+ hw->bgx_cnt = MAX_BGX_PER_CN88XX;
+ hw->chans_per_lmac = 16;
+ hw->chans_per_bgx = 128;
+ hw->cpi_cnt = 2048;
+ hw->rssi_cnt = 4096;
+ hw->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE;
+ hw->tl3_cnt = 256;
+ hw->tl2_cnt = 64;
+ hw->tl1_cnt = 2;
+ hw->tl1_per_bgx = true;
+ break;
+ case PCI_SUBSYS_DEVID_81XX_NIC_PF:
+ hw->bgx_cnt = MAX_BGX_PER_CN81XX;
+ hw->chans_per_lmac = 8;
+ hw->chans_per_bgx = 32;
+ hw->chans_per_rgx = 8;
+ hw->chans_per_lbk = 24;
+ hw->cpi_cnt = 512;
+ hw->rssi_cnt = 256;
+ hw->rss_ind_tbl_size = 32; /* Max RSSI / Max interfaces */
+ hw->tl3_cnt = 64;
+ hw->tl2_cnt = 16;
+ hw->tl1_cnt = 10;
+ hw->tl1_per_bgx = false;
+ break;
+ case PCI_SUBSYS_DEVID_83XX_NIC_PF:
+ hw->bgx_cnt = MAX_BGX_PER_CN83XX;
+ hw->chans_per_lmac = 8;
+ hw->chans_per_bgx = 32;
+ hw->chans_per_lbk = 64;
+ hw->cpi_cnt = 2048;
+ hw->rssi_cnt = 1024;
+ hw->rss_ind_tbl_size = 64; /* Max RSSI / Max interfaces */
+ hw->tl3_cnt = 256;
+ hw->tl2_cnt = 64;
+ hw->tl1_cnt = 18;
+ hw->tl1_per_bgx = false;
+ break;
+ }
+ hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev);
+}
+
+#define BGX0_BLOCK 8
+#define BGX1_BLOCK 9
+
+static void nic_init_hw(struct nicpf *nic)
+{
+ int i;
+ u64 cqm_cfg;
+
+ /* Enable NIC HW block */
+ nic_reg_write(nic, NIC_PF_CFG, 0x3);
+
+ /* Enable backpressure */
+ nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03);
+
+ /* TNS and TNS bypass modes are present only on 88xx
+ * Also offset of this CSR has changed in 81xx and 83xx.
+ */
+ if (nic->pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF) {
+ /* Disable TNS mode on both interfaces */
+ nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
+ (NIC_TNS_BYPASS_MODE << 7) |
+ BGX0_BLOCK | (1ULL << 16));
+ nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
+ (NIC_TNS_BYPASS_MODE << 7) |
+ BGX1_BLOCK | (1ULL << 16));
+ } else {
+ /* Configure timestamp generation timeout to 10us */
+ for (i = 0; i < nic->hw->bgx_cnt; i++)
+ nic_reg_write(nic, NIC_PF_INTFX_SEND_CFG | (i << 3),
+ (1ULL << 16));
+ }
+
+ nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG,
+ (1ULL << 63) | BGX0_BLOCK);
+ nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8),
+ (1ULL << 63) | BGX1_BLOCK);
+
+ /* PKIND configuration */
+ nic->pkind.minlen = 0;
+ nic->pkind.maxlen = NIC_HW_MAX_FRS + VLAN_ETH_HLEN + ETH_FCS_LEN + 4;
+ nic->pkind.lenerr_en = 1;
+ nic->pkind.rx_hdr = 0;
+ nic->pkind.hdr_sl = 0;
+
+ for (i = 0; i < NIC_MAX_PKIND; i++)
+ nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (i << 3),
+ *(u64 *)&nic->pkind);
+
+ nic_set_tx_pkt_pad(nic, NIC_HW_MIN_FRS);
+
+ /* Timer config */
+ nic_reg_write(nic, NIC_PF_INTR_TIMER_CFG, NICPF_CLK_PER_INT_TICK);
+
+ /* Enable VLAN ethertype matching and stripping */
+ nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7,
+ (2 << 19) | (ETYPE_ALG_VLAN_STRIP << 16) | ETH_P_8021Q);
+
+ /* Check if HW expected value is higher (could be in future chips) */
+ cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG);
+ if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL)
+ nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL);
+}
+
+/* Channel parse index configuration */
+static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
+{
+ struct hw_info *hw = nic->hw;
+ u32 vnic, bgx, lmac, chan;
+ u32 padd, cpi_count = 0;
+ u64 cpi_base, cpi, rssi_base, rssi;
+ u8 qset, rq_idx = 0;
+
+ vnic = cfg->vf_id;
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
+
+ chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx);
+ cpi_base = vnic * NIC_MAX_CPI_PER_LMAC;
+ rssi_base = vnic * hw->rss_ind_tbl_size;
+
+ /* Rx channel configuration */
+ nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3),
+ (1ull << 63) | (vnic << 0));
+ nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_CFG | (chan << 3),
+ ((u64)cfg->cpi_alg << 62) | (cpi_base << 48));
+
+ if (cfg->cpi_alg == CPI_ALG_NONE)
+ cpi_count = 1;
+ else if (cfg->cpi_alg == CPI_ALG_VLAN) /* 3 bits of PCP */
+ cpi_count = 8;
+ else if (cfg->cpi_alg == CPI_ALG_VLAN16) /* 3 bits PCP + DEI */
+ cpi_count = 16;
+ else if (cfg->cpi_alg == CPI_ALG_DIFF) /* 6bits DSCP */
+ cpi_count = NIC_MAX_CPI_PER_LMAC;
+
+ /* RSS Qset, Qidx mapping */
+ qset = cfg->vf_id;
+ rssi = rssi_base;
+ for (; rssi < (rssi_base + cfg->rq_cnt); rssi++) {
+ nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3),
+ (qset << 3) | rq_idx);
+ rq_idx++;
+ }
+
+ rssi = 0;
+ cpi = cpi_base;
+ for (; cpi < (cpi_base + cpi_count); cpi++) {
+ /* Determine port to channel adder */
+ if (cfg->cpi_alg != CPI_ALG_DIFF)
+ padd = cpi % cpi_count;
+ else
+ padd = cpi % 8; /* 3 bits CS out of 6bits DSCP */
+
+ /* Leave RSS_SIZE as '0' to disable RSS */
+ if (pass1_silicon(nic->pdev)) {
+ nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3),
+ (vnic << 24) | (padd << 16) |
+ (rssi_base + rssi));
+ } else {
+ /* Set MPI_ALG to '0' to disable MCAM parsing */
+ nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3),
+ (padd << 16));
+ /* MPI index is same as CPI if MPI_ALG is not enabled */
+ nic_reg_write(nic, NIC_PF_MPI_0_2047_CFG | (cpi << 3),
+ (vnic << 24) | (rssi_base + rssi));
+ }
+
+ if ((rssi + 1) >= cfg->rq_cnt)
+ continue;
+
+ if (cfg->cpi_alg == CPI_ALG_VLAN)
+ rssi++;
+ else if (cfg->cpi_alg == CPI_ALG_VLAN16)
+ rssi = ((cpi - cpi_base) & 0xe) >> 1;
+ else if (cfg->cpi_alg == CPI_ALG_DIFF)
+ rssi = ((cpi - cpi_base) & 0x38) >> 3;
+ }
+ nic->cpi_base[cfg->vf_id] = cpi_base;
+ nic->rssi_base[cfg->vf_id] = rssi_base;
+}
+
+/* Responsds to VF with its RSS indirection table size */
+static void nic_send_rss_size(struct nicpf *nic, int vf)
+{
+ union nic_mbx mbx = {};
+
+ mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
+ mbx.rss_size.ind_tbl_size = nic->hw->rss_ind_tbl_size;
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* Receive side scaling configuration
+ * configure:
+ * - RSS index
+ * - indir table i.e hash::RQ mapping
+ * - no of hash bits to consider
+ */
+static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
+{
+ u8 qset, idx = 0;
+ u64 cpi_cfg, cpi_base, rssi_base, rssi;
+ u64 idx_addr;
+
+ rssi_base = nic->rssi_base[cfg->vf_id] + cfg->tbl_offset;
+
+ rssi = rssi_base;
+
+ for (; rssi < (rssi_base + cfg->tbl_len); rssi++) {
+ u8 svf = cfg->ind_tbl[idx] >> 3;
+
+ if (svf)
+ qset = nic->vf_sqs[cfg->vf_id][svf - 1];
+ else
+ qset = cfg->vf_id;
+ nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3),
+ (qset << 3) | (cfg->ind_tbl[idx] & 0x7));
+ idx++;
+ }
+
+ cpi_base = nic->cpi_base[cfg->vf_id];
+ if (pass1_silicon(nic->pdev))
+ idx_addr = NIC_PF_CPI_0_2047_CFG;
+ else
+ idx_addr = NIC_PF_MPI_0_2047_CFG;
+ cpi_cfg = nic_reg_read(nic, idx_addr | (cpi_base << 3));
+ cpi_cfg &= ~(0xFULL << 20);
+ cpi_cfg |= (cfg->hash_bits << 20);
+ nic_reg_write(nic, idx_addr | (cpi_base << 3), cpi_cfg);
+}
+
+/* 4 level transmit side scheduler configutation
+ * for TNS bypass mode
+ *
+ * Sample configuration for SQ0 on 88xx
+ * VNIC0-SQ0 -> TL4(0) -> TL3[0] -> TL2[0] -> TL1[0] -> BGX0
+ * VNIC1-SQ0 -> TL4(8) -> TL3[2] -> TL2[0] -> TL1[0] -> BGX0
+ * VNIC2-SQ0 -> TL4(16) -> TL3[4] -> TL2[1] -> TL1[0] -> BGX0
+ * VNIC3-SQ0 -> TL4(24) -> TL3[6] -> TL2[1] -> TL1[0] -> BGX0
+ * VNIC4-SQ0 -> TL4(512) -> TL3[128] -> TL2[32] -> TL1[1] -> BGX1
+ * VNIC5-SQ0 -> TL4(520) -> TL3[130] -> TL2[32] -> TL1[1] -> BGX1
+ * VNIC6-SQ0 -> TL4(528) -> TL3[132] -> TL2[33] -> TL1[1] -> BGX1
+ * VNIC7-SQ0 -> TL4(536) -> TL3[134] -> TL2[33] -> TL1[1] -> BGX1
+ */
+static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
+ struct sq_cfg_msg *sq)
+{
+ struct hw_info *hw = nic->hw;
+ u32 bgx, lmac, chan;
+ u32 tl2, tl3, tl4;
+ u32 rr_quantum;
+ u8 sq_idx = sq->sq_num;
+ u8 pqs_vnic;
+ int svf;
+
+ if (sq->sqs_mode)
+ pqs_vnic = nic->pqs_vf[vnic];
+ else
+ pqs_vnic = vnic;
+
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[pqs_vnic]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[pqs_vnic]);
+
+ /* 24 bytes for FCS, IPG and preamble */
+ rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
+
+ /* For 88xx 0-511 TL4 transmits via BGX0 and
+ * 512-1023 TL4s transmit via BGX1.
+ */
+ if (hw->tl1_per_bgx) {
+ tl4 = bgx * (hw->tl4_cnt / hw->bgx_cnt);
+ if (!sq->sqs_mode) {
+ tl4 += (lmac * MAX_QUEUES_PER_QSET);
+ } else {
+ for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
+ if (nic->vf_sqs[pqs_vnic][svf] == vnic)
+ break;
+ }
+ tl4 += (MAX_LMAC_PER_BGX * MAX_QUEUES_PER_QSET);
+ tl4 += (lmac * MAX_QUEUES_PER_QSET * MAX_SQS_PER_VF);
+ tl4 += (svf * MAX_QUEUES_PER_QSET);
+ }
+ } else {
+ tl4 = (vnic * MAX_QUEUES_PER_QSET);
+ }
+ tl4 += sq_idx;
+
+ tl3 = tl4 / (hw->tl4_cnt / hw->tl3_cnt);
+ nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
+ ((u64)vnic << NIC_QS_ID_SHIFT) |
+ ((u32)sq_idx << NIC_Q_NUM_SHIFT), tl4);
+ nic_reg_write(nic, NIC_PF_TL4_0_1023_CFG | (tl4 << 3),
+ ((u64)vnic << 27) | ((u32)sq_idx << 24) | rr_quantum);
+
+ nic_reg_write(nic, NIC_PF_TL3_0_255_CFG | (tl3 << 3), rr_quantum);
+
+ /* On 88xx 0-127 channels are for BGX0 and
+ * 127-255 channels for BGX1.
+ *
+ * On 81xx/83xx TL3_CHAN reg should be configured with channel
+ * within LMAC i.e 0-7 and not the actual channel number like on 88xx
+ */
+ chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx);
+ if (hw->tl1_per_bgx)
+ nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan);
+ else
+ nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), 0);
+
+ /* Enable backpressure on the channel */
+ nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1);
+
+ tl2 = tl3 >> 2;
+ nic_reg_write(nic, NIC_PF_TL3A_0_63_CFG | (tl2 << 3), tl2);
+ nic_reg_write(nic, NIC_PF_TL2_0_63_CFG | (tl2 << 3), rr_quantum);
+ /* No priorities as of now */
+ nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00);
+
+ /* Unlike 88xx where TL2s 0-31 transmits to TL1 '0' and rest to TL1 '1'
+ * on 81xx/83xx TL2 needs to be configured to transmit to one of the
+ * possible LMACs.
+ *
+ * This register doesn't exist on 88xx.
+ */
+ if (!hw->tl1_per_bgx)
+ nic_reg_write(nic, NIC_PF_TL2_LMAC | (tl2 << 3),
+ lmac + (bgx * MAX_LMAC_PER_BGX));
+}
+
+/* Send primary nicvf pointer to secondary QS's VF */
+static void nic_send_pnicvf(struct nicpf *nic, int sqs)
+{
+ union nic_mbx mbx = {};
+
+ mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR;
+ mbx.nicvf.nicvf = nic->nicvf[nic->pqs_vf[sqs]];
+ nic_send_msg_to_vf(nic, sqs, &mbx);
+}
+
+/* Send SQS's nicvf pointer to primary QS's VF */
+static void nic_send_snicvf(struct nicpf *nic, struct nicvf_ptr *nicvf)
+{
+ union nic_mbx mbx = {};
+ int sqs_id = nic->vf_sqs[nicvf->vf_id][nicvf->sqs_id];
+
+ mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR;
+ mbx.nicvf.sqs_id = nicvf->sqs_id;
+ mbx.nicvf.nicvf = nic->nicvf[sqs_id];
+ nic_send_msg_to_vf(nic, nicvf->vf_id, &mbx);
+}
+
+/* Find next available Qset that can be assigned as a
+ * secondary Qset to a VF.
+ */
+static int nic_nxt_avail_sqs(struct nicpf *nic)
+{
+ int sqs;
+
+ for (sqs = 0; sqs < nic->num_sqs_en; sqs++) {
+ if (!nic->sqs_used[sqs])
+ nic->sqs_used[sqs] = true;
+ else
+ continue;
+ return sqs + nic->num_vf_en;
+ }
+ return -1;
+}
+
+/* Allocate additional Qsets for requested VF */
+static void nic_alloc_sqs(struct nicpf *nic, struct sqs_alloc *sqs)
+{
+ union nic_mbx mbx = {};
+ int idx, alloc_qs = 0;
+ int sqs_id;
+
+ if (!nic->num_sqs_en)
+ goto send_mbox;
+
+ for (idx = 0; idx < sqs->qs_count; idx++) {
+ sqs_id = nic_nxt_avail_sqs(nic);
+ if (sqs_id < 0)
+ break;
+ nic->vf_sqs[sqs->vf_id][idx] = sqs_id;
+ nic->pqs_vf[sqs_id] = sqs->vf_id;
+ alloc_qs++;
+ }
+
+send_mbox:
+ mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS;
+ mbx.sqs_alloc.vf_id = sqs->vf_id;
+ mbx.sqs_alloc.qs_count = alloc_qs;
+ nic_send_msg_to_vf(nic, sqs->vf_id, &mbx);
+}
+
+static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk)
+{
+ int bgx_idx, lmac_idx;
+
+ if (lbk->vf_id >= nic->num_vf_en)
+ return -1;
+
+ bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]);
+ lmac_idx = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]);
+
+ bgx_lmac_internal_loopback(nic->node, bgx_idx, lmac_idx, lbk->enable);
+
+ /* Enable moving average calculation.
+ * Keep the LVL/AVG delay to HW enforced minimum so that, not too many
+ * packets sneek in between average calculations.
+ */
+ nic_reg_write(nic, NIC_PF_CQ_AVG_CFG,
+ (BIT_ULL(20) | 0x2ull << 14 | 0x1));
+ nic_reg_write(nic, NIC_PF_RRM_AVG_CFG,
+ (BIT_ULL(20) | 0x3ull << 14 | 0x1));
+
+ return 0;
+}
+
+/* Reset statistics counters */
+static int nic_reset_stat_counters(struct nicpf *nic,
+ int vf, struct reset_stat_cfg *cfg)
+{
+ int i, stat, qnum;
+ u64 reg_addr;
+
+ for (i = 0; i < RX_STATS_ENUM_LAST; i++) {
+ if (cfg->rx_stat_mask & BIT(i)) {
+ reg_addr = NIC_PF_VNIC_0_127_RX_STAT_0_13 |
+ (vf << NIC_QS_ID_SHIFT) |
+ (i << 3);
+ nic_reg_write(nic, reg_addr, 0);
+ }
+ }
+
+ for (i = 0; i < TX_STATS_ENUM_LAST; i++) {
+ if (cfg->tx_stat_mask & BIT(i)) {
+ reg_addr = NIC_PF_VNIC_0_127_TX_STAT_0_4 |
+ (vf << NIC_QS_ID_SHIFT) |
+ (i << 3);
+ nic_reg_write(nic, reg_addr, 0);
+ }
+ }
+
+ for (i = 0; i <= 15; i++) {
+ qnum = i >> 1;
+ stat = i & 1 ? 1 : 0;
+ reg_addr = (vf << NIC_QS_ID_SHIFT) |
+ (qnum << NIC_Q_NUM_SHIFT) | (stat << 3);
+ if (cfg->rq_stat_mask & BIT(i)) {
+ reg_addr |= NIC_PF_QSET_0_127_RQ_0_7_STAT_0_1;
+ nic_reg_write(nic, reg_addr, 0);
+ }
+ if (cfg->sq_stat_mask & BIT(i)) {
+ reg_addr |= NIC_PF_QSET_0_127_SQ_0_7_STAT_0_1;
+ nic_reg_write(nic, reg_addr, 0);
+ }
+ }
+
+ return 0;
+}
+
+static void nic_enable_tunnel_parsing(struct nicpf *nic, int vf)
+{
+ u64 prot_def = (IPV6_PROT << 32) | (IPV4_PROT << 16) | ET_PROT;
+ u64 vxlan_prot_def = (IPV6_PROT_DEF << 32) |
+ (IPV4_PROT_DEF) << 16 | ET_PROT_DEF;
+
+ /* Configure tunnel parsing parameters */
+ nic_reg_write(nic, NIC_PF_RX_GENEVE_DEF,
+ (1ULL << 63 | UDP_GENEVE_PORT_NUM));
+ nic_reg_write(nic, NIC_PF_RX_GENEVE_PROT_DEF,
+ ((7ULL << 61) | prot_def));
+ nic_reg_write(nic, NIC_PF_RX_NVGRE_PROT_DEF,
+ ((7ULL << 61) | prot_def));
+ nic_reg_write(nic, NIC_PF_RX_VXLAN_DEF_0_1,
+ ((1ULL << 63) | UDP_VXLAN_PORT_NUM));
+ nic_reg_write(nic, NIC_PF_RX_VXLAN_PROT_DEF,
+ ((0xfULL << 60) | vxlan_prot_def));
+}
+
+static void nic_enable_vf(struct nicpf *nic, int vf, bool enable)
+{
+ int bgx, lmac;
+
+ nic->vf_enabled[vf] = enable;
+
+ if (vf >= nic->num_vf_en)
+ return;
+
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+ bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, enable);
+}
+
+static void nic_pause_frame(struct nicpf *nic, int vf, struct pfc *cfg)
+{
+ int bgx, lmac;
+ struct pfc pfc;
+ union nic_mbx mbx = {};
+
+ if (vf >= nic->num_vf_en)
+ return;
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+ if (cfg->get) {
+ bgx_lmac_get_pfc(nic->node, bgx, lmac, &pfc);
+ mbx.pfc.msg = NIC_MBOX_MSG_PFC;
+ mbx.pfc.autoneg = pfc.autoneg;
+ mbx.pfc.fc_rx = pfc.fc_rx;
+ mbx.pfc.fc_tx = pfc.fc_tx;
+ nic_send_msg_to_vf(nic, vf, &mbx);
+ } else {
+ bgx_lmac_set_pfc(nic->node, bgx, lmac, cfg);
+ nic_mbx_send_ack(nic, vf);
+ }
+}
+
+/* Enable or disable HW timestamping by BGX for pkts received on a LMAC */
+static void nic_config_timestamp(struct nicpf *nic, int vf, struct set_ptp *ptp)
+{
+ struct pkind_cfg *pkind;
+ u8 lmac, bgx_idx;
+ u64 pkind_val, pkind_idx;
+
+ if (vf >= nic->num_vf_en)
+ return;
+
+ bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+ pkind_idx = lmac + bgx_idx * MAX_LMAC_PER_BGX;
+ pkind_val = nic_reg_read(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3));
+ pkind = (struct pkind_cfg *)&pkind_val;
+
+ if (ptp->enable && !pkind->hdr_sl) {
+ /* Skiplen to exclude 8byte timestamp while parsing pkt
+ * If not configured, will result in L2 errors.
+ */
+ pkind->hdr_sl = 4;
+ /* Adjust max packet length allowed */
+ pkind->maxlen += (pkind->hdr_sl * 2);
+ bgx_config_timestamping(nic->node, bgx_idx, lmac, true);
+ nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7 | (1 << 3),
+ (ETYPE_ALG_ENDPARSE << 16) | ETH_P_1588);
+ } else if (!ptp->enable && pkind->hdr_sl) {
+ pkind->maxlen -= (pkind->hdr_sl * 2);
+ pkind->hdr_sl = 0;
+ bgx_config_timestamping(nic->node, bgx_idx, lmac, false);
+ nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7 | (1 << 3),
+ (ETYPE_ALG_SKIP << 16) | ETH_P_8021Q);
+ }
+
+ nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3), pkind_val);
+}
+
+/* Interrupt handler to handle mailbox messages from VFs */
+static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
+{
+ union nic_mbx mbx = {};
+ u64 *mbx_data;
+ u64 mbx_addr;
+ u64 reg_addr;
+ u64 cfg;
+ int bgx, lmac;
+ int i;
+ int ret = 0;
+
+ nic->mbx_lock[vf] = true;
+
+ mbx_addr = nic_get_mbx_addr(vf);
+ mbx_data = (u64 *)&mbx;
+
+ for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) {
+ *mbx_data = nic_reg_read(nic, mbx_addr);
+ mbx_data++;
+ mbx_addr += sizeof(u64);
+ }
+
+ dev_dbg(&nic->pdev->dev, "%s: Mailbox msg 0x%02x from VF%d\n",
+ __func__, mbx.msg.msg, vf);
+ switch (mbx.msg.msg) {
+ case NIC_MBOX_MSG_READY:
+ nic_mbx_send_ready(nic, vf);
+ if (vf < nic->num_vf_en) {
+ nic->link[vf] = 0;
+ nic->duplex[vf] = 0;
+ nic->speed[vf] = 0;
+ }
+ goto unlock;
+ case NIC_MBOX_MSG_QS_CFG:
+ reg_addr = NIC_PF_QSET_0_127_CFG |
+ (mbx.qs.num << NIC_QS_ID_SHIFT);
+ cfg = mbx.qs.cfg;
+ /* Check if its a secondary Qset */
+ if (vf >= nic->num_vf_en) {
+ cfg = cfg & (~0x7FULL);
+ /* Assign this Qset to primary Qset's VF */
+ cfg |= nic->pqs_vf[vf];
+ }
+ nic_reg_write(nic, reg_addr, cfg);
+ break;
+ case NIC_MBOX_MSG_RQ_CFG:
+ reg_addr = NIC_PF_QSET_0_127_RQ_0_7_CFG |
+ (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
+ (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
+ nic_reg_write(nic, reg_addr, mbx.rq.cfg);
+ /* Enable CQE_RX2_S extension in CQE_RX descriptor.
+ * This gets appended by default on 81xx/83xx chips,
+ * for consistency enabling the same on 88xx pass2
+ * where this is introduced.
+ */
+ if (pass2_silicon(nic->pdev))
+ nic_reg_write(nic, NIC_PF_RX_CFG, 0x01);
+ if (!pass1_silicon(nic->pdev))
+ nic_enable_tunnel_parsing(nic, vf);
+ break;
+ case NIC_MBOX_MSG_RQ_BP_CFG:
+ reg_addr = NIC_PF_QSET_0_127_RQ_0_7_BP_CFG |
+ (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
+ (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
+ nic_reg_write(nic, reg_addr, mbx.rq.cfg);
+ break;
+ case NIC_MBOX_MSG_RQ_SW_SYNC:
+ ret = nic_rcv_queue_sw_sync(nic);
+ break;
+ case NIC_MBOX_MSG_RQ_DROP_CFG:
+ reg_addr = NIC_PF_QSET_0_127_RQ_0_7_DROP_CFG |
+ (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
+ (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
+ nic_reg_write(nic, reg_addr, mbx.rq.cfg);
+ break;
+ case NIC_MBOX_MSG_SQ_CFG:
+ reg_addr = NIC_PF_QSET_0_127_SQ_0_7_CFG |
+ (mbx.sq.qs_num << NIC_QS_ID_SHIFT) |
+ (mbx.sq.sq_num << NIC_Q_NUM_SHIFT);
+ nic_reg_write(nic, reg_addr, mbx.sq.cfg);
+ nic_tx_channel_cfg(nic, mbx.qs.num, &mbx.sq);
+ break;
+ case NIC_MBOX_MSG_SET_MAC:
+ if (vf >= nic->num_vf_en) {
+ ret = -1; /* NACK */
+ break;
+ }
+ lmac = mbx.mac.vf_id;
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
+ bgx_set_lmac_mac(nic->node, bgx, lmac, mbx.mac.mac_addr);
+ break;
+ case NIC_MBOX_MSG_SET_MAX_FRS:
+ ret = nic_update_hw_frs(nic, mbx.frs.max_frs,
+ mbx.frs.vf_id);
+ break;
+ case NIC_MBOX_MSG_CPI_CFG:
+ nic_config_cpi(nic, &mbx.cpi_cfg);
+ break;
+ case NIC_MBOX_MSG_RSS_SIZE:
+ nic_send_rss_size(nic, vf);
+ goto unlock;
+ case NIC_MBOX_MSG_RSS_CFG:
+ case NIC_MBOX_MSG_RSS_CFG_CONT:
+ nic_config_rss(nic, &mbx.rss_cfg);
+ break;
+ case NIC_MBOX_MSG_CFG_DONE:
+ /* Last message of VF config msg sequence */
+ nic_enable_vf(nic, vf, true);
+ break;
+ case NIC_MBOX_MSG_SHUTDOWN:
+ /* First msg in VF teardown sequence */
+ if (vf >= nic->num_vf_en)
+ nic->sqs_used[vf - nic->num_vf_en] = false;
+ nic->pqs_vf[vf] = 0;
+ nic_enable_vf(nic, vf, false);
+ break;
+ case NIC_MBOX_MSG_ALLOC_SQS:
+ nic_alloc_sqs(nic, &mbx.sqs_alloc);
+ goto unlock;
+ case NIC_MBOX_MSG_NICVF_PTR:
+ nic->nicvf[vf] = mbx.nicvf.nicvf;
+ break;
+ case NIC_MBOX_MSG_PNICVF_PTR:
+ nic_send_pnicvf(nic, vf);
+ goto unlock;
+ case NIC_MBOX_MSG_SNICVF_PTR:
+ nic_send_snicvf(nic, &mbx.nicvf);
+ goto unlock;
+ case NIC_MBOX_MSG_BGX_STATS:
+ nic_get_bgx_stats(nic, &mbx.bgx_stats);
+ goto unlock;
+ case NIC_MBOX_MSG_LOOPBACK:
+ ret = nic_config_loopback(nic, &mbx.lbk);
+ break;
+ case NIC_MBOX_MSG_RESET_STAT_COUNTER:
+ ret = nic_reset_stat_counters(nic, vf, &mbx.reset_stat);
+ break;
+ case NIC_MBOX_MSG_PFC:
+ nic_pause_frame(nic, vf, &mbx.pfc);
+ goto unlock;
+ case NIC_MBOX_MSG_PTP_CFG:
+ nic_config_timestamp(nic, vf, &mbx.ptp);
+ break;
+ case NIC_MBOX_MSG_RESET_XCAST:
+ if (vf >= nic->num_vf_en) {
+ ret = -1; /* NACK */
+ break;
+ }
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ bgx_reset_xcast_mode(nic->node, bgx, lmac,
+ vf < NIC_VF_PER_MBX_REG ? vf :
+ vf - NIC_VF_PER_MBX_REG);
+ break;
+
+ case NIC_MBOX_MSG_ADD_MCAST:
+ if (vf >= nic->num_vf_en) {
+ ret = -1; /* NACK */
+ break;
+ }
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ bgx_set_dmac_cam_filter(nic->node, bgx, lmac,
+ mbx.xcast.data.mac,
+ vf < NIC_VF_PER_MBX_REG ? vf :
+ vf - NIC_VF_PER_MBX_REG);
+ break;
+
+ case NIC_MBOX_MSG_SET_XCAST:
+ if (vf >= nic->num_vf_en) {
+ ret = -1; /* NACK */
+ break;
+ }
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.data.mode);
+ break;
+ default:
+ dev_err(&nic->pdev->dev,
+ "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
+ break;
+ }
+
+ if (!ret) {
+ nic_mbx_send_ack(nic, vf);
+ } else if (mbx.msg.msg != NIC_MBOX_MSG_READY) {
+ dev_err(&nic->pdev->dev, "NACK for MBOX 0x%02x from VF %d\n",
+ mbx.msg.msg, vf);
+ nic_mbx_send_nack(nic, vf);
+ }
+unlock:
+ nic->mbx_lock[vf] = false;
+}
+
+static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
+{
+ struct nicpf *nic = (struct nicpf *)nic_irq;
+ int mbx;
+ u64 intr;
+ u8 vf;
+
+ if (irq == pci_irq_vector(nic->pdev, NIC_PF_INTR_ID_MBOX0))
+ mbx = 0;
+ else
+ mbx = 1;
+
+ intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3));
+ dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr);
+ for (vf = 0; vf < NIC_VF_PER_MBX_REG; vf++) {
+ if (intr & (1ULL << vf)) {
+ dev_dbg(&nic->pdev->dev, "Intr from VF %d\n",
+ vf + (mbx * NIC_VF_PER_MBX_REG));
+
+ nic_handle_mbx_intr(nic, vf +
+ (mbx * NIC_VF_PER_MBX_REG));
+ nic_clear_mbx_intr(nic, vf, mbx);
+ }
+ }
+ return IRQ_HANDLED;
+}
+
+static void nic_free_all_interrupts(struct nicpf *nic)
+{
+ int irq;
+
+ for (irq = 0; irq < nic->num_vec; irq++) {
+ if (nic->irq_allocated[irq])
+ free_irq(pci_irq_vector(nic->pdev, irq), nic);
+ nic->irq_allocated[irq] = false;
+ }
+}
+
+static int nic_register_interrupts(struct nicpf *nic)
+{
+ int i, ret;
+ nic->num_vec = pci_msix_vec_count(nic->pdev);
+
+ /* Enable MSI-X */
+ ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec,
+ PCI_IRQ_MSIX);
+ if (ret < 0) {
+ dev_err(&nic->pdev->dev,
+ "Request for #%d msix vectors failed, returned %d\n",
+ nic->num_vec, ret);
+ return ret;
+ }
+
+ /* Register mailbox interrupt handler */
+ for (i = NIC_PF_INTR_ID_MBOX0; i < nic->num_vec; i++) {
+ sprintf(nic->irq_name[i],
+ "NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0));
+
+ ret = request_irq(pci_irq_vector(nic->pdev, i),
+ nic_mbx_intr_handler, 0,
+ nic->irq_name[i], nic);
+ if (ret)
+ goto fail;
+
+ nic->irq_allocated[i] = true;
+ }
+
+ /* Enable mailbox interrupt */
+ nic_enable_mbx_intr(nic);
+ return 0;
+
+fail:
+ dev_err(&nic->pdev->dev, "Request irq failed\n");
+ nic_free_all_interrupts(nic);
+ pci_free_irq_vectors(nic->pdev);
+ nic->num_vec = 0;
+ return ret;
+}
+
+static void nic_unregister_interrupts(struct nicpf *nic)
+{
+ nic_free_all_interrupts(nic);
+ pci_free_irq_vectors(nic->pdev);
+ nic->num_vec = 0;
+}
+
+static int nic_num_sqs_en(struct nicpf *nic, int vf_en)
+{
+ int pos, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE;
+ u16 total_vf;
+
+ /* Secondary Qsets are needed only if CPU count is
+ * morethan MAX_QUEUES_PER_QSET.
+ */
+ if (num_online_cpus() <= MAX_QUEUES_PER_QSET)
+ return 0;
+
+ /* Check if its a multi-node environment */
+ if (nr_node_ids > 1)
+ sqs_per_vf = MAX_SQS_PER_VF;
+
+ pos = pci_find_ext_capability(nic->pdev, PCI_EXT_CAP_ID_SRIOV);
+ pci_read_config_word(nic->pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf);
+ return min(total_vf - vf_en, vf_en * sqs_per_vf);
+}
+
+static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic)
+{
+ int pos = 0;
+ int vf_en;
+ int err;
+ u16 total_vf_cnt;
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+ if (!pos) {
+ dev_err(&pdev->dev, "SRIOV capability is not found in PCIe config space\n");
+ return -ENODEV;
+ }
+
+ pci_read_config_word(pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf_cnt);
+ if (total_vf_cnt < nic->num_vf_en)
+ nic->num_vf_en = total_vf_cnt;
+
+ if (!total_vf_cnt)
+ return 0;
+
+ vf_en = nic->num_vf_en;
+ nic->num_sqs_en = nic_num_sqs_en(nic, nic->num_vf_en);
+ vf_en += nic->num_sqs_en;
+
+ err = pci_enable_sriov(pdev, vf_en);
+ if (err) {
+ dev_err(&pdev->dev, "SRIOV enable failed, num VF is %d\n",
+ vf_en);
+ nic->num_vf_en = 0;
+ return err;
+ }
+
+ dev_info(&pdev->dev, "SRIOV enabled, number of VF available %d\n",
+ vf_en);
+
+ nic->flags |= NIC_SRIOV_ENABLED;
+ return 0;
+}
+
+/* Poll for BGX LMAC link status and update corresponding VF
+ * if there is a change, valid only if internal L2 switch
+ * is not present otherwise VF link is always treated as up
+ */
+static void nic_poll_for_link(struct work_struct *work)
+{
+ union nic_mbx mbx = {};
+ struct nicpf *nic;
+ struct bgx_link_status link;
+ u8 vf, bgx, lmac;
+
+ nic = container_of(work, struct nicpf, dwork.work);
+
+ mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
+
+ for (vf = 0; vf < nic->num_vf_en; vf++) {
+ /* Poll only if VF is UP */
+ if (!nic->vf_enabled[vf])
+ continue;
+
+ /* Get BGX, LMAC indices for the VF */
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ /* Get interface link status */
+ bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
+
+ /* Inform VF only if link status changed */
+ if (nic->link[vf] == link.link_up)
+ continue;
+
+ if (!nic->mbx_lock[vf]) {
+ nic->link[vf] = link.link_up;
+ nic->duplex[vf] = link.duplex;
+ nic->speed[vf] = link.speed;
+
+ /* Send a mbox message to VF with current link status */
+ mbx.link_status.link_up = link.link_up;
+ mbx.link_status.duplex = link.duplex;
+ mbx.link_status.speed = link.speed;
+ mbx.link_status.mac_type = link.mac_type;
+ nic_send_msg_to_vf(nic, vf, &mbx);
+ }
+ }
+ queue_delayed_work(nic->check_link, &nic->dwork, HZ * 2);
+}
+
+static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct device *dev = &pdev->dev;
+ struct nicpf *nic;
+ u8 max_lmac;
+ int err;
+
+ BUILD_BUG_ON(sizeof(union nic_mbx) > 16);
+
+ nic = devm_kzalloc(dev, sizeof(*nic), GFP_KERNEL);
+ if (!nic)
+ return -ENOMEM;
+
+ nic->hw = devm_kzalloc(dev, sizeof(struct hw_info), GFP_KERNEL);
+ if (!nic->hw)
+ return -ENOMEM;
+
+ pci_set_drvdata(pdev, nic);
+
+ nic->pdev = pdev;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(dev, "Failed to enable PCI device\n");
+ pci_set_drvdata(pdev, NULL);
+ return err;
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ dev_err(dev, "PCI request regions failed 0x%x\n", err);
+ goto err_disable_device;
+ }
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+ if (err) {
+ dev_err(dev, "Unable to get usable DMA configuration\n");
+ goto err_release_regions;
+ }
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+ if (err) {
+ dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
+ goto err_release_regions;
+ }
+
+ /* MAP PF's configuration registers */
+ nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+ if (!nic->reg_base) {
+ dev_err(dev, "Cannot map config register space, aborting\n");
+ err = -ENOMEM;
+ goto err_release_regions;
+ }
+
+ nic->node = nic_get_node_id(pdev);
+
+ /* Get HW capability info */
+ nic_get_hw_info(nic);
+
+ /* Allocate memory for LMAC tracking elements */
+ err = -ENOMEM;
+ max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX;
+
+ nic->vf_lmac_map = devm_kmalloc_array(dev, max_lmac, sizeof(u8),
+ GFP_KERNEL);
+ if (!nic->vf_lmac_map)
+ goto err_release_regions;
+
+ nic->link = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
+ if (!nic->link)
+ goto err_release_regions;
+
+ nic->duplex = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
+ if (!nic->duplex)
+ goto err_release_regions;
+
+ nic->speed = devm_kmalloc_array(dev, max_lmac, sizeof(u32), GFP_KERNEL);
+ if (!nic->speed)
+ goto err_release_regions;
+
+ /* Initialize hardware */
+ nic_init_hw(nic);
+
+ nic_set_lmac_vf_mapping(nic);
+
+ /* Register interrupts */
+ err = nic_register_interrupts(nic);
+ if (err)
+ goto err_release_regions;
+
+ /* Configure SRIOV */
+ err = nic_sriov_init(pdev, nic);
+ if (err)
+ goto err_unregister_interrupts;
+
+ /* Register a physical link status poll fn() */
+ nic->check_link = alloc_workqueue("check_link_status",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
+ if (!nic->check_link) {
+ err = -ENOMEM;
+ goto err_disable_sriov;
+ }
+
+ INIT_DELAYED_WORK(&nic->dwork, nic_poll_for_link);
+ queue_delayed_work(nic->check_link, &nic->dwork, 0);
+
+ return 0;
+
+err_disable_sriov:
+ if (nic->flags & NIC_SRIOV_ENABLED)
+ pci_disable_sriov(pdev);
+err_unregister_interrupts:
+ nic_unregister_interrupts(nic);
+err_release_regions:
+ pci_release_regions(pdev);
+err_disable_device:
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ return err;
+}
+
+static void nic_remove(struct pci_dev *pdev)
+{
+ struct nicpf *nic = pci_get_drvdata(pdev);
+
+ if (!nic)
+ return;
+
+ if (nic->flags & NIC_SRIOV_ENABLED)
+ pci_disable_sriov(pdev);
+
+ if (nic->check_link) {
+ /* Destroy work Queue */
+ cancel_delayed_work_sync(&nic->dwork);
+ destroy_workqueue(nic->check_link);
+ }
+
+ nic_unregister_interrupts(nic);
+ pci_release_regions(pdev);
+
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+static struct pci_driver nic_driver = {
+ .name = DRV_NAME,
+ .id_table = nic_id_table,
+ .probe = nic_probe,
+ .remove = nic_remove,
+};
+
+static int __init nic_init_module(void)
+{
+ pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
+
+ return pci_register_driver(&nic_driver);
+}
+
+static void __exit nic_cleanup_module(void)
+{
+ pci_unregister_driver(&nic_driver);
+}
+
+module_init(nic_init_module);
+module_exit(nic_cleanup_module);
diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h
new file mode 100644
index 000000000..a16c48a1e
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2015 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef NIC_REG_H
+#define NIC_REG_H
+
+#define NIC_PF_REG_COUNT 29573
+#define NIC_VF_REG_COUNT 249
+
+/* Physical function register offsets */
+#define NIC_PF_CFG (0x0000)
+#define NIC_PF_STATUS (0x0010)
+#define NIC_PF_INTR_TIMER_CFG (0x0030)
+#define NIC_PF_BIST_STATUS (0x0040)
+#define NIC_PF_SOFT_RESET (0x0050)
+#define NIC_PF_TCP_TIMER (0x0060)
+#define NIC_PF_BP_CFG (0x0080)
+#define NIC_PF_RRM_CFG (0x0088)
+#define NIC_PF_CQM_CFG (0x00A0)
+#define NIC_PF_CNM_CF (0x00A8)
+#define NIC_PF_CNM_STATUS (0x00B0)
+#define NIC_PF_CQ_AVG_CFG (0x00C0)
+#define NIC_PF_RRM_AVG_CFG (0x00C8)
+#define NIC_PF_INTF_0_1_SEND_CFG (0x0200)
+#define NIC_PF_INTF_0_1_BP_CFG (0x0208)
+#define NIC_PF_INTF_0_1_BP_DIS_0_1 (0x0210)
+#define NIC_PF_INTF_0_1_BP_SW_0_1 (0x0220)
+#define NIC_PF_RBDR_BP_STATE_0_3 (0x0240)
+#define NIC_PF_MAILBOX_INT (0x0410)
+#define NIC_PF_MAILBOX_INT_W1S (0x0430)
+#define NIC_PF_MAILBOX_ENA_W1C (0x0450)
+#define NIC_PF_MAILBOX_ENA_W1S (0x0470)
+#define NIC_PF_RX_ETYPE_0_7 (0x0500)
+#define NIC_PF_RX_GENEVE_DEF (0x0580)
+#define UDP_GENEVE_PORT_NUM 0x17C1ULL
+#define NIC_PF_RX_GENEVE_PROT_DEF (0x0588)
+#define IPV6_PROT 0x86DDULL
+#define IPV4_PROT 0x800ULL
+#define ET_PROT 0x6558ULL
+#define NIC_PF_RX_NVGRE_PROT_DEF (0x0598)
+#define NIC_PF_RX_VXLAN_DEF_0_1 (0x05A0)
+#define UDP_VXLAN_PORT_NUM 0x12B5
+#define NIC_PF_RX_VXLAN_PROT_DEF (0x05B0)
+#define IPV6_PROT_DEF 0x2ULL
+#define IPV4_PROT_DEF 0x1ULL
+#define ET_PROT_DEF 0x3ULL
+#define NIC_PF_RX_CFG (0x05D0)
+#define NIC_PF_PKIND_0_15_CFG (0x0600)
+#define NIC_PF_ECC0_FLIP0 (0x1000)
+#define NIC_PF_ECC1_FLIP0 (0x1008)
+#define NIC_PF_ECC2_FLIP0 (0x1010)
+#define NIC_PF_ECC3_FLIP0 (0x1018)
+#define NIC_PF_ECC0_FLIP1 (0x1080)
+#define NIC_PF_ECC1_FLIP1 (0x1088)
+#define NIC_PF_ECC2_FLIP1 (0x1090)
+#define NIC_PF_ECC3_FLIP1 (0x1098)
+#define NIC_PF_ECC0_CDIS (0x1100)
+#define NIC_PF_ECC1_CDIS (0x1108)
+#define NIC_PF_ECC2_CDIS (0x1110)
+#define NIC_PF_ECC3_CDIS (0x1118)
+#define NIC_PF_BIST0_STATUS (0x1280)
+#define NIC_PF_BIST1_STATUS (0x1288)
+#define NIC_PF_BIST2_STATUS (0x1290)
+#define NIC_PF_BIST3_STATUS (0x1298)
+#define NIC_PF_ECC0_SBE_INT (0x2000)
+#define NIC_PF_ECC0_SBE_INT_W1S (0x2008)
+#define NIC_PF_ECC0_SBE_ENA_W1C (0x2010)
+#define NIC_PF_ECC0_SBE_ENA_W1S (0x2018)
+#define NIC_PF_ECC0_DBE_INT (0x2100)
+#define NIC_PF_ECC0_DBE_INT_W1S (0x2108)
+#define NIC_PF_ECC0_DBE_ENA_W1C (0x2110)
+#define NIC_PF_ECC0_DBE_ENA_W1S (0x2118)
+#define NIC_PF_ECC1_SBE_INT (0x2200)
+#define NIC_PF_ECC1_SBE_INT_W1S (0x2208)
+#define NIC_PF_ECC1_SBE_ENA_W1C (0x2210)
+#define NIC_PF_ECC1_SBE_ENA_W1S (0x2218)
+#define NIC_PF_ECC1_DBE_INT (0x2300)
+#define NIC_PF_ECC1_DBE_INT_W1S (0x2308)
+#define NIC_PF_ECC1_DBE_ENA_W1C (0x2310)
+#define NIC_PF_ECC1_DBE_ENA_W1S (0x2318)
+#define NIC_PF_ECC2_SBE_INT (0x2400)
+#define NIC_PF_ECC2_SBE_INT_W1S (0x2408)
+#define NIC_PF_ECC2_SBE_ENA_W1C (0x2410)
+#define NIC_PF_ECC2_SBE_ENA_W1S (0x2418)
+#define NIC_PF_ECC2_DBE_INT (0x2500)
+#define NIC_PF_ECC2_DBE_INT_W1S (0x2508)
+#define NIC_PF_ECC2_DBE_ENA_W1C (0x2510)
+#define NIC_PF_ECC2_DBE_ENA_W1S (0x2518)
+#define NIC_PF_ECC3_SBE_INT (0x2600)
+#define NIC_PF_ECC3_SBE_INT_W1S (0x2608)
+#define NIC_PF_ECC3_SBE_ENA_W1C (0x2610)
+#define NIC_PF_ECC3_SBE_ENA_W1S (0x2618)
+#define NIC_PF_ECC3_DBE_INT (0x2700)
+#define NIC_PF_ECC3_DBE_INT_W1S (0x2708)
+#define NIC_PF_ECC3_DBE_ENA_W1C (0x2710)
+#define NIC_PF_ECC3_DBE_ENA_W1S (0x2718)
+#define NIC_PF_INTFX_SEND_CFG (0x4000)
+#define NIC_PF_MCAM_0_191_ENA (0x100000)
+#define NIC_PF_MCAM_0_191_M_0_5_DATA (0x110000)
+#define NIC_PF_MCAM_CTRL (0x120000)
+#define NIC_PF_CPI_0_2047_CFG (0x200000)
+#define NIC_PF_MPI_0_2047_CFG (0x210000)
+#define NIC_PF_RSSI_0_4097_RQ (0x220000)
+#define NIC_PF_LMAC_0_7_CFG (0x240000)
+#define NIC_PF_LMAC_0_7_CFG2 (0x240100)
+#define NIC_PF_LMAC_0_7_SW_XOFF (0x242000)
+#define NIC_PF_LMAC_0_7_CREDIT (0x244000)
+#define NIC_PF_CHAN_0_255_TX_CFG (0x400000)
+#define NIC_PF_CHAN_0_255_RX_CFG (0x420000)
+#define NIC_PF_CHAN_0_255_SW_XOFF (0x440000)
+#define NIC_PF_CHAN_0_255_CREDIT (0x460000)
+#define NIC_PF_CHAN_0_255_RX_BP_CFG (0x480000)
+#define NIC_PF_SW_SYNC_RX (0x490000)
+#define NIC_PF_SW_SYNC_RX_DONE (0x490008)
+#define NIC_PF_TL2_0_63_CFG (0x500000)
+#define NIC_PF_TL2_0_63_PRI (0x520000)
+#define NIC_PF_TL2_LMAC (0x540000)
+#define NIC_PF_TL2_0_63_SH_STATUS (0x580000)
+#define NIC_PF_TL3A_0_63_CFG (0x5F0000)
+#define NIC_PF_TL3_0_255_CFG (0x600000)
+#define NIC_PF_TL3_0_255_CHAN (0x620000)
+#define NIC_PF_TL3_0_255_PIR (0x640000)
+#define NIC_PF_TL3_0_255_SW_XOFF (0x660000)
+#define NIC_PF_TL3_0_255_CNM_RATE (0x680000)
+#define NIC_PF_TL3_0_255_SH_STATUS (0x6A0000)
+#define NIC_PF_TL4A_0_255_CFG (0x6F0000)
+#define NIC_PF_TL4_0_1023_CFG (0x800000)
+#define NIC_PF_TL4_0_1023_SW_XOFF (0x820000)
+#define NIC_PF_TL4_0_1023_SH_STATUS (0x840000)
+#define NIC_PF_TL4A_0_1023_CNM_RATE (0x880000)
+#define NIC_PF_TL4A_0_1023_CNM_STATUS (0x8A0000)
+#define NIC_PF_VF_0_127_MAILBOX_0_1 (0x20002030)
+#define NIC_PF_VNIC_0_127_TX_STAT_0_4 (0x20004000)
+#define NIC_PF_VNIC_0_127_RX_STAT_0_13 (0x20004100)
+#define NIC_PF_QSET_0_127_LOCK_0_15 (0x20006000)
+#define NIC_PF_QSET_0_127_CFG (0x20010000)
+#define NIC_PF_QSET_0_127_RQ_0_7_CFG (0x20010400)
+#define NIC_PF_QSET_0_127_RQ_0_7_DROP_CFG (0x20010420)
+#define NIC_PF_QSET_0_127_RQ_0_7_BP_CFG (0x20010500)
+#define NIC_PF_QSET_0_127_RQ_0_7_STAT_0_1 (0x20010600)
+#define NIC_PF_QSET_0_127_SQ_0_7_CFG (0x20010C00)
+#define NIC_PF_QSET_0_127_SQ_0_7_CFG2 (0x20010C08)
+#define NIC_PF_QSET_0_127_SQ_0_7_STAT_0_1 (0x20010D00)
+
+#define NIC_PF_MSIX_VEC_0_18_ADDR (0x000000)
+#define NIC_PF_MSIX_VEC_0_CTL (0x000008)
+#define NIC_PF_MSIX_PBA_0 (0x0F0000)
+
+/* Virtual function register offsets */
+#define NIC_VNIC_CFG (0x000020)
+#define NIC_VF_PF_MAILBOX_0_1 (0x000130)
+#define NIC_VF_INT (0x000200)
+#define NIC_VF_INT_W1S (0x000220)
+#define NIC_VF_ENA_W1C (0x000240)
+#define NIC_VF_ENA_W1S (0x000260)
+
+#define NIC_VNIC_RSS_CFG (0x0020E0)
+#define NIC_VNIC_RSS_KEY_0_4 (0x002200)
+#define NIC_VNIC_TX_STAT_0_4 (0x004000)
+#define NIC_VNIC_RX_STAT_0_13 (0x004100)
+#define NIC_QSET_RQ_GEN_CFG (0x010010)
+
+#define NIC_QSET_CQ_0_7_CFG (0x010400)
+#define NIC_QSET_CQ_0_7_CFG2 (0x010408)
+#define NIC_QSET_CQ_0_7_THRESH (0x010410)
+#define NIC_QSET_CQ_0_7_BASE (0x010420)
+#define NIC_QSET_CQ_0_7_HEAD (0x010428)
+#define NIC_QSET_CQ_0_7_TAIL (0x010430)
+#define NIC_QSET_CQ_0_7_DOOR (0x010438)
+#define NIC_QSET_CQ_0_7_STATUS (0x010440)
+#define NIC_QSET_CQ_0_7_STATUS2 (0x010448)
+#define NIC_QSET_CQ_0_7_DEBUG (0x010450)
+
+#define NIC_QSET_RQ_0_7_CFG (0x010600)
+#define NIC_QSET_RQ_0_7_STAT_0_1 (0x010700)
+
+#define NIC_QSET_SQ_0_7_CFG (0x010800)
+#define NIC_QSET_SQ_0_7_THRESH (0x010810)
+#define NIC_QSET_SQ_0_7_BASE (0x010820)
+#define NIC_QSET_SQ_0_7_HEAD (0x010828)
+#define NIC_QSET_SQ_0_7_TAIL (0x010830)
+#define NIC_QSET_SQ_0_7_DOOR (0x010838)
+#define NIC_QSET_SQ_0_7_STATUS (0x010840)
+#define NIC_QSET_SQ_0_7_DEBUG (0x010848)
+#define NIC_QSET_SQ_0_7_STAT_0_1 (0x010900)
+
+#define NIC_QSET_RBDR_0_1_CFG (0x010C00)
+#define NIC_QSET_RBDR_0_1_THRESH (0x010C10)
+#define NIC_QSET_RBDR_0_1_BASE (0x010C20)
+#define NIC_QSET_RBDR_0_1_HEAD (0x010C28)
+#define NIC_QSET_RBDR_0_1_TAIL (0x010C30)
+#define NIC_QSET_RBDR_0_1_DOOR (0x010C38)
+#define NIC_QSET_RBDR_0_1_STATUS0 (0x010C40)
+#define NIC_QSET_RBDR_0_1_STATUS1 (0x010C48)
+#define NIC_QSET_RBDR_0_1_PREFETCH_STATUS (0x010C50)
+
+#define NIC_VF_MSIX_VECTOR_0_19_ADDR (0x000000)
+#define NIC_VF_MSIX_VECTOR_0_19_CTL (0x000008)
+#define NIC_VF_MSIX_PBA (0x0F0000)
+
+/* Offsets within registers */
+#define NIC_MSIX_VEC_SHIFT 4
+#define NIC_Q_NUM_SHIFT 18
+#define NIC_QS_ID_SHIFT 21
+#define NIC_VF_NUM_SHIFT 21
+
+/* Port kind configuration register */
+struct pkind_cfg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 reserved_42_63:22;
+ u64 hdr_sl:5; /* Header skip length */
+ u64 rx_hdr:3; /* TNS Receive header present */
+ u64 lenerr_en:1;/* L2 length error check enable */
+ u64 reserved_32_32:1;
+ u64 maxlen:16; /* Max frame size */
+ u64 minlen:16; /* Min frame size */
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 minlen:16;
+ u64 maxlen:16;
+ u64 reserved_32_32:1;
+ u64 lenerr_en:1;
+ u64 rx_hdr:3;
+ u64 hdr_sl:5;
+ u64 reserved_42_63:22;
+#endif
+};
+
+#endif /* NIC_REG_H */
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
new file mode 100644
index 000000000..92ba958e2
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -0,0 +1,885 @@
+/*
+ * Copyright (C) 2015 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+/* ETHTOOL Support for VNIC_VF Device*/
+
+#include <linux/pci.h>
+#include <linux/net_tstamp.h>
+
+#include "nic_reg.h"
+#include "nic.h"
+#include "nicvf_queues.h"
+#include "q_struct.h"
+#include "thunder_bgx.h"
+#include "../common/cavium_ptp.h"
+
+#define DRV_NAME "nicvf"
+#define DRV_VERSION "1.0"
+
+struct nicvf_stat {
+ char name[ETH_GSTRING_LEN];
+ unsigned int index;
+};
+
+#define NICVF_HW_STAT(stat) { \
+ .name = #stat, \
+ .index = offsetof(struct nicvf_hw_stats, stat) / sizeof(u64), \
+}
+
+#define NICVF_DRV_STAT(stat) { \
+ .name = #stat, \
+ .index = offsetof(struct nicvf_drv_stats, stat) / sizeof(u64), \
+}
+
+static const struct nicvf_stat nicvf_hw_stats[] = {
+ NICVF_HW_STAT(rx_bytes),
+ NICVF_HW_STAT(rx_frames),
+ NICVF_HW_STAT(rx_ucast_frames),
+ NICVF_HW_STAT(rx_bcast_frames),
+ NICVF_HW_STAT(rx_mcast_frames),
+ NICVF_HW_STAT(rx_drops),
+ NICVF_HW_STAT(rx_drop_red),
+ NICVF_HW_STAT(rx_drop_red_bytes),
+ NICVF_HW_STAT(rx_drop_overrun),
+ NICVF_HW_STAT(rx_drop_overrun_bytes),
+ NICVF_HW_STAT(rx_drop_bcast),
+ NICVF_HW_STAT(rx_drop_mcast),
+ NICVF_HW_STAT(rx_drop_l3_bcast),
+ NICVF_HW_STAT(rx_drop_l3_mcast),
+ NICVF_HW_STAT(rx_fcs_errors),
+ NICVF_HW_STAT(rx_l2_errors),
+ NICVF_HW_STAT(tx_bytes),
+ NICVF_HW_STAT(tx_frames),
+ NICVF_HW_STAT(tx_ucast_frames),
+ NICVF_HW_STAT(tx_bcast_frames),
+ NICVF_HW_STAT(tx_mcast_frames),
+ NICVF_HW_STAT(tx_drops),
+};
+
+static const struct nicvf_stat nicvf_drv_stats[] = {
+ NICVF_DRV_STAT(rx_bgx_truncated_pkts),
+ NICVF_DRV_STAT(rx_jabber_errs),
+ NICVF_DRV_STAT(rx_fcs_errs),
+ NICVF_DRV_STAT(rx_bgx_errs),
+ NICVF_DRV_STAT(rx_prel2_errs),
+ NICVF_DRV_STAT(rx_l2_hdr_malformed),
+ NICVF_DRV_STAT(rx_oversize),
+ NICVF_DRV_STAT(rx_undersize),
+ NICVF_DRV_STAT(rx_l2_len_mismatch),
+ NICVF_DRV_STAT(rx_l2_pclp),
+ NICVF_DRV_STAT(rx_ip_ver_errs),
+ NICVF_DRV_STAT(rx_ip_csum_errs),
+ NICVF_DRV_STAT(rx_ip_hdr_malformed),
+ NICVF_DRV_STAT(rx_ip_payload_malformed),
+ NICVF_DRV_STAT(rx_ip_ttl_errs),
+ NICVF_DRV_STAT(rx_l3_pclp),
+ NICVF_DRV_STAT(rx_l4_malformed),
+ NICVF_DRV_STAT(rx_l4_csum_errs),
+ NICVF_DRV_STAT(rx_udp_len_errs),
+ NICVF_DRV_STAT(rx_l4_port_errs),
+ NICVF_DRV_STAT(rx_tcp_flag_errs),
+ NICVF_DRV_STAT(rx_tcp_offset_errs),
+ NICVF_DRV_STAT(rx_l4_pclp),
+ NICVF_DRV_STAT(rx_truncated_pkts),
+
+ NICVF_DRV_STAT(tx_desc_fault),
+ NICVF_DRV_STAT(tx_hdr_cons_err),
+ NICVF_DRV_STAT(tx_subdesc_err),
+ NICVF_DRV_STAT(tx_max_size_exceeded),
+ NICVF_DRV_STAT(tx_imm_size_oflow),
+ NICVF_DRV_STAT(tx_data_seq_err),
+ NICVF_DRV_STAT(tx_mem_seq_err),
+ NICVF_DRV_STAT(tx_lock_viol),
+ NICVF_DRV_STAT(tx_data_fault),
+ NICVF_DRV_STAT(tx_tstmp_conflict),
+ NICVF_DRV_STAT(tx_tstmp_timeout),
+ NICVF_DRV_STAT(tx_mem_fault),
+ NICVF_DRV_STAT(tx_csum_overlap),
+ NICVF_DRV_STAT(tx_csum_overflow),
+
+ NICVF_DRV_STAT(tx_tso),
+ NICVF_DRV_STAT(tx_timeout),
+ NICVF_DRV_STAT(txq_stop),
+ NICVF_DRV_STAT(txq_wake),
+ NICVF_DRV_STAT(rcv_buffer_alloc_failures),
+ NICVF_DRV_STAT(page_alloc),
+};
+
+static const struct nicvf_stat nicvf_queue_stats[] = {
+ { "bytes", 0 },
+ { "frames", 1 },
+};
+
+static const unsigned int nicvf_n_hw_stats = ARRAY_SIZE(nicvf_hw_stats);
+static const unsigned int nicvf_n_drv_stats = ARRAY_SIZE(nicvf_drv_stats);
+static const unsigned int nicvf_n_queue_stats = ARRAY_SIZE(nicvf_queue_stats);
+
+static int nicvf_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ u32 supported, advertising;
+
+ supported = 0;
+ advertising = 0;
+
+ if (!nic->link_up) {
+ cmd->base.duplex = DUPLEX_UNKNOWN;
+ cmd->base.speed = SPEED_UNKNOWN;
+ return 0;
+ }
+
+ switch (nic->speed) {
+ case SPEED_1000:
+ cmd->base.port = PORT_MII | PORT_TP;
+ cmd->base.autoneg = AUTONEG_ENABLE;
+ supported |= SUPPORTED_MII | SUPPORTED_TP;
+ supported |= SUPPORTED_1000baseT_Full |
+ SUPPORTED_1000baseT_Half |
+ SUPPORTED_100baseT_Full |
+ SUPPORTED_100baseT_Half |
+ SUPPORTED_10baseT_Full |
+ SUPPORTED_10baseT_Half;
+ supported |= SUPPORTED_Autoneg;
+ advertising |= ADVERTISED_1000baseT_Full |
+ ADVERTISED_1000baseT_Half |
+ ADVERTISED_100baseT_Full |
+ ADVERTISED_100baseT_Half |
+ ADVERTISED_10baseT_Full |
+ ADVERTISED_10baseT_Half;
+ break;
+ case SPEED_10000:
+ if (nic->mac_type == BGX_MODE_RXAUI) {
+ cmd->base.port = PORT_TP;
+ supported |= SUPPORTED_TP;
+ } else {
+ cmd->base.port = PORT_FIBRE;
+ supported |= SUPPORTED_FIBRE;
+ }
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ supported |= SUPPORTED_10000baseT_Full;
+ break;
+ case SPEED_40000:
+ cmd->base.port = PORT_FIBRE;
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ supported |= SUPPORTED_FIBRE;
+ supported |= SUPPORTED_40000baseCR4_Full;
+ break;
+ }
+ cmd->base.duplex = nic->duplex;
+ cmd->base.speed = nic->speed;
+
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+ supported);
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+ advertising);
+
+ return 0;
+}
+
+static u32 nicvf_get_link(struct net_device *netdev)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+
+ return nic->link_up;
+}
+
+static void nicvf_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *info)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+
+ strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
+ strlcpy(info->version, DRV_VERSION, sizeof(info->version));
+ strlcpy(info->bus_info, pci_name(nic->pdev), sizeof(info->bus_info));
+}
+
+static u32 nicvf_get_msglevel(struct net_device *netdev)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+
+ return nic->msg_enable;
+}
+
+static void nicvf_set_msglevel(struct net_device *netdev, u32 lvl)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+
+ nic->msg_enable = lvl;
+}
+
+static void nicvf_get_qset_strings(struct nicvf *nic, u8 **data, int qset)
+{
+ int stats, qidx;
+ int start_qidx = qset * MAX_RCV_QUEUES_PER_QS;
+
+ for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
+ for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
+ sprintf(*data, "rxq%d: %s", qidx + start_qidx,
+ nicvf_queue_stats[stats].name);
+ *data += ETH_GSTRING_LEN;
+ }
+ }
+
+ for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
+ for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
+ sprintf(*data, "txq%d: %s", qidx + start_qidx,
+ nicvf_queue_stats[stats].name);
+ *data += ETH_GSTRING_LEN;
+ }
+ }
+}
+
+static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ int stats;
+ int sqs;
+
+ if (sset != ETH_SS_STATS)
+ return;
+
+ for (stats = 0; stats < nicvf_n_hw_stats; stats++) {
+ memcpy(data, nicvf_hw_stats[stats].name, ETH_GSTRING_LEN);
+ data += ETH_GSTRING_LEN;
+ }
+
+ for (stats = 0; stats < nicvf_n_drv_stats; stats++) {
+ memcpy(data, nicvf_drv_stats[stats].name, ETH_GSTRING_LEN);
+ data += ETH_GSTRING_LEN;
+ }
+
+ nicvf_get_qset_strings(nic, &data, 0);
+
+ for (sqs = 0; sqs < nic->sqs_count; sqs++) {
+ if (!nic->snicvf[sqs])
+ continue;
+ nicvf_get_qset_strings(nic->snicvf[sqs], &data, sqs + 1);
+ }
+
+ for (stats = 0; stats < BGX_RX_STATS_COUNT; stats++) {
+ sprintf(data, "bgx_rxstat%d: ", stats);
+ data += ETH_GSTRING_LEN;
+ }
+
+ for (stats = 0; stats < BGX_TX_STATS_COUNT; stats++) {
+ sprintf(data, "bgx_txstat%d: ", stats);
+ data += ETH_GSTRING_LEN;
+ }
+}
+
+static int nicvf_get_sset_count(struct net_device *netdev, int sset)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ int qstats_count;
+ int sqs;
+
+ if (sset != ETH_SS_STATS)
+ return -EINVAL;
+
+ qstats_count = nicvf_n_queue_stats *
+ (nic->qs->rq_cnt + nic->qs->sq_cnt);
+ for (sqs = 0; sqs < nic->sqs_count; sqs++) {
+ struct nicvf *snic;
+
+ snic = nic->snicvf[sqs];
+ if (!snic)
+ continue;
+ qstats_count += nicvf_n_queue_stats *
+ (snic->qs->rq_cnt + snic->qs->sq_cnt);
+ }
+
+ return nicvf_n_hw_stats + nicvf_n_drv_stats +
+ qstats_count +
+ BGX_RX_STATS_COUNT + BGX_TX_STATS_COUNT;
+}
+
+static void nicvf_get_qset_stats(struct nicvf *nic,
+ struct ethtool_stats *stats, u64 **data)
+{
+ int stat, qidx;
+
+ if (!nic)
+ return;
+
+ for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
+ nicvf_update_rq_stats(nic, qidx);
+ for (stat = 0; stat < nicvf_n_queue_stats; stat++)
+ *((*data)++) = ((u64 *)&nic->qs->rq[qidx].stats)
+ [nicvf_queue_stats[stat].index];
+ }
+
+ for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
+ nicvf_update_sq_stats(nic, qidx);
+ for (stat = 0; stat < nicvf_n_queue_stats; stat++)
+ *((*data)++) = ((u64 *)&nic->qs->sq[qidx].stats)
+ [nicvf_queue_stats[stat].index];
+ }
+}
+
+static void nicvf_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ int stat, tmp_stats;
+ int sqs, cpu;
+
+ nicvf_update_stats(nic);
+
+ /* Update LMAC stats */
+ nicvf_update_lmac_stats(nic);
+
+ for (stat = 0; stat < nicvf_n_hw_stats; stat++)
+ *(data++) = ((u64 *)&nic->hw_stats)
+ [nicvf_hw_stats[stat].index];
+ for (stat = 0; stat < nicvf_n_drv_stats; stat++) {
+ tmp_stats = 0;
+ for_each_possible_cpu(cpu)
+ tmp_stats += ((u64 *)per_cpu_ptr(nic->drv_stats, cpu))
+ [nicvf_drv_stats[stat].index];
+ *(data++) = tmp_stats;
+ }
+
+ nicvf_get_qset_stats(nic, stats, &data);
+
+ for (sqs = 0; sqs < nic->sqs_count; sqs++) {
+ if (!nic->snicvf[sqs])
+ continue;
+ nicvf_get_qset_stats(nic->snicvf[sqs], stats, &data);
+ }
+
+ for (stat = 0; stat < BGX_RX_STATS_COUNT; stat++)
+ *(data++) = nic->bgx_stats.rx_stats[stat];
+ for (stat = 0; stat < BGX_TX_STATS_COUNT; stat++)
+ *(data++) = nic->bgx_stats.tx_stats[stat];
+}
+
+static int nicvf_get_regs_len(struct net_device *dev)
+{
+ return sizeof(u64) * NIC_VF_REG_COUNT;
+}
+
+static void nicvf_get_regs(struct net_device *dev,
+ struct ethtool_regs *regs, void *reg)
+{
+ struct nicvf *nic = netdev_priv(dev);
+ u64 *p = (u64 *)reg;
+ u64 reg_offset;
+ int mbox, key, stat, q;
+ int i = 0;
+
+ regs->version = 0;
+ memset(p, 0, NIC_VF_REG_COUNT);
+
+ p[i++] = nicvf_reg_read(nic, NIC_VNIC_CFG);
+ /* Mailbox registers */
+ for (mbox = 0; mbox < NIC_PF_VF_MAILBOX_SIZE; mbox++)
+ p[i++] = nicvf_reg_read(nic,
+ NIC_VF_PF_MAILBOX_0_1 | (mbox << 3));
+
+ p[i++] = nicvf_reg_read(nic, NIC_VF_INT);
+ p[i++] = nicvf_reg_read(nic, NIC_VF_INT_W1S);
+ p[i++] = nicvf_reg_read(nic, NIC_VF_ENA_W1C);
+ p[i++] = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
+ p[i++] = nicvf_reg_read(nic, NIC_VNIC_RSS_CFG);
+
+ for (key = 0; key < RSS_HASH_KEY_SIZE; key++)
+ p[i++] = nicvf_reg_read(nic, NIC_VNIC_RSS_KEY_0_4 | (key << 3));
+
+ /* Tx/Rx statistics */
+ for (stat = 0; stat < TX_STATS_ENUM_LAST; stat++)
+ p[i++] = nicvf_reg_read(nic,
+ NIC_VNIC_TX_STAT_0_4 | (stat << 3));
+
+ for (i = 0; i < RX_STATS_ENUM_LAST; i++)
+ p[i++] = nicvf_reg_read(nic,
+ NIC_VNIC_RX_STAT_0_13 | (stat << 3));
+
+ p[i++] = nicvf_reg_read(nic, NIC_QSET_RQ_GEN_CFG);
+
+ /* All completion queue's registers */
+ for (q = 0; q < MAX_CMP_QUEUES_PER_QS; q++) {
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_CFG, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_CFG2, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_THRESH, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_BASE, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_TAIL, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_DOOR, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS2, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_DEBUG, q);
+ }
+
+ /* All receive queue's registers */
+ for (q = 0; q < MAX_RCV_QUEUES_PER_QS; q++) {
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RQ_0_7_CFG, q);
+ p[i++] = nicvf_queue_reg_read(nic,
+ NIC_QSET_RQ_0_7_STAT_0_1, q);
+ reg_offset = NIC_QSET_RQ_0_7_STAT_0_1 | (1 << 3);
+ p[i++] = nicvf_queue_reg_read(nic, reg_offset, q);
+ }
+
+ for (q = 0; q < MAX_SND_QUEUES_PER_QS; q++) {
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_THRESH, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_BASE, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DOOR, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STATUS, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DEBUG, q);
+ /* Padding, was NIC_QSET_SQ_0_7_CNM_CHG, which
+ * produces bus errors when read
+ */
+ p[i++] = 0;
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1, q);
+ reg_offset = NIC_QSET_SQ_0_7_STAT_0_1 | (1 << 3);
+ p[i++] = nicvf_queue_reg_read(nic, reg_offset, q);
+ }
+
+ for (q = 0; q < MAX_RCV_BUF_DESC_RINGS_PER_QS; q++) {
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_CFG, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_THRESH, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_BASE, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_HEAD, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_DOOR, q);
+ p[i++] = nicvf_queue_reg_read(nic,
+ NIC_QSET_RBDR_0_1_STATUS0, q);
+ p[i++] = nicvf_queue_reg_read(nic,
+ NIC_QSET_RBDR_0_1_STATUS1, q);
+ reg_offset = NIC_QSET_RBDR_0_1_PREFETCH_STATUS;
+ p[i++] = nicvf_queue_reg_read(nic, reg_offset, q);
+ }
+}
+
+static int nicvf_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *cmd)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+
+ cmd->rx_coalesce_usecs = nic->cq_coalesce_usecs;
+ return 0;
+}
+
+static void nicvf_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ struct queue_set *qs = nic->qs;
+
+ ring->rx_max_pending = MAX_CMP_QUEUE_LEN;
+ ring->rx_pending = qs->cq_len;
+ ring->tx_max_pending = MAX_SND_QUEUE_LEN;
+ ring->tx_pending = qs->sq_len;
+}
+
+static int nicvf_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ struct queue_set *qs = nic->qs;
+ u32 rx_count, tx_count;
+
+ /* Due to HW errata this is not supported on T88 pass 1.x silicon */
+ if (pass1_silicon(nic->pdev))
+ return -EINVAL;
+
+ if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+ return -EINVAL;
+
+ tx_count = clamp_t(u32, ring->tx_pending,
+ MIN_SND_QUEUE_LEN, MAX_SND_QUEUE_LEN);
+ rx_count = clamp_t(u32, ring->rx_pending,
+ MIN_CMP_QUEUE_LEN, MAX_CMP_QUEUE_LEN);
+
+ if ((tx_count == qs->sq_len) && (rx_count == qs->cq_len))
+ return 0;
+
+ /* Permitted lengths are 1K, 2K, 4K, 8K, 16K, 32K, 64K */
+ qs->sq_len = rounddown_pow_of_two(tx_count);
+ qs->cq_len = rounddown_pow_of_two(rx_count);
+
+ if (netif_running(netdev)) {
+ nicvf_stop(netdev);
+ nicvf_open(netdev);
+ }
+
+ return 0;
+}
+
+static int nicvf_get_rss_hash_opts(struct nicvf *nic,
+ struct ethtool_rxnfc *info)
+{
+ info->data = 0;
+
+ switch (info->flow_type) {
+ case TCP_V4_FLOW:
+ case TCP_V6_FLOW:
+ case UDP_V4_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V4_FLOW:
+ case SCTP_V6_FLOW:
+ info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ /* Fall through */
+ case IPV4_FLOW:
+ case IPV6_FLOW:
+ info->data |= RXH_IP_SRC | RXH_IP_DST;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nicvf_get_rxnfc(struct net_device *dev,
+ struct ethtool_rxnfc *info, u32 *rules)
+{
+ struct nicvf *nic = netdev_priv(dev);
+ int ret = -EOPNOTSUPP;
+
+ switch (info->cmd) {
+ case ETHTOOL_GRXRINGS:
+ info->data = nic->rx_queues;
+ ret = 0;
+ break;
+ case ETHTOOL_GRXFH:
+ return nicvf_get_rss_hash_opts(nic, info);
+ default:
+ break;
+ }
+ return ret;
+}
+
+static int nicvf_set_rss_hash_opts(struct nicvf *nic,
+ struct ethtool_rxnfc *info)
+{
+ struct nicvf_rss_info *rss = &nic->rss_info;
+ u64 rss_cfg = nicvf_reg_read(nic, NIC_VNIC_RSS_CFG);
+
+ if (!rss->enable)
+ netdev_err(nic->netdev,
+ "RSS is disabled, hash cannot be set\n");
+
+ netdev_info(nic->netdev, "Set RSS flow type = %d, data = %lld\n",
+ info->flow_type, info->data);
+
+ if (!(info->data & RXH_IP_SRC) || !(info->data & RXH_IP_DST))
+ return -EINVAL;
+
+ switch (info->flow_type) {
+ case TCP_V4_FLOW:
+ case TCP_V6_FLOW:
+ switch (info->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ case 0:
+ rss_cfg &= ~(1ULL << RSS_HASH_TCP);
+ break;
+ case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+ rss_cfg |= (1ULL << RSS_HASH_TCP);
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case UDP_V4_FLOW:
+ case UDP_V6_FLOW:
+ switch (info->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ case 0:
+ rss_cfg &= ~(1ULL << RSS_HASH_UDP);
+ break;
+ case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+ rss_cfg |= (1ULL << RSS_HASH_UDP);
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case SCTP_V4_FLOW:
+ case SCTP_V6_FLOW:
+ switch (info->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ case 0:
+ rss_cfg &= ~(1ULL << RSS_HASH_L4ETC);
+ break;
+ case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+ rss_cfg |= (1ULL << RSS_HASH_L4ETC);
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case IPV4_FLOW:
+ case IPV6_FLOW:
+ rss_cfg = RSS_HASH_IP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss_cfg);
+ return 0;
+}
+
+static int nicvf_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
+{
+ struct nicvf *nic = netdev_priv(dev);
+
+ switch (info->cmd) {
+ case ETHTOOL_SRXFH:
+ return nicvf_set_rss_hash_opts(nic, info);
+ default:
+ break;
+ }
+ return -EOPNOTSUPP;
+}
+
+static u32 nicvf_get_rxfh_key_size(struct net_device *netdev)
+{
+ return RSS_HASH_KEY_SIZE * sizeof(u64);
+}
+
+static u32 nicvf_get_rxfh_indir_size(struct net_device *dev)
+{
+ struct nicvf *nic = netdev_priv(dev);
+
+ return nic->rss_info.rss_size;
+}
+
+static int nicvf_get_rxfh(struct net_device *dev, u32 *indir, u8 *hkey,
+ u8 *hfunc)
+{
+ struct nicvf *nic = netdev_priv(dev);
+ struct nicvf_rss_info *rss = &nic->rss_info;
+ int idx;
+
+ if (indir) {
+ for (idx = 0; idx < rss->rss_size; idx++)
+ indir[idx] = rss->ind_tbl[idx];
+ }
+
+ if (hkey)
+ memcpy(hkey, rss->key, RSS_HASH_KEY_SIZE * sizeof(u64));
+
+ if (hfunc)
+ *hfunc = ETH_RSS_HASH_TOP;
+
+ return 0;
+}
+
+static int nicvf_set_rxfh(struct net_device *dev, const u32 *indir,
+ const u8 *hkey, const u8 hfunc)
+{
+ struct nicvf *nic = netdev_priv(dev);
+ struct nicvf_rss_info *rss = &nic->rss_info;
+ int idx;
+
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+ return -EOPNOTSUPP;
+
+ if (!rss->enable) {
+ netdev_err(nic->netdev,
+ "RSS is disabled, cannot change settings\n");
+ return -EIO;
+ }
+
+ if (indir) {
+ for (idx = 0; idx < rss->rss_size; idx++)
+ rss->ind_tbl[idx] = indir[idx];
+ }
+
+ if (hkey) {
+ memcpy(rss->key, hkey, RSS_HASH_KEY_SIZE * sizeof(u64));
+ nicvf_set_rss_key(nic);
+ }
+
+ nicvf_config_rss(nic);
+ return 0;
+}
+
+/* Get no of queues device supports and current queue count */
+static void nicvf_get_channels(struct net_device *dev,
+ struct ethtool_channels *channel)
+{
+ struct nicvf *nic = netdev_priv(dev);
+
+ memset(channel, 0, sizeof(*channel));
+
+ channel->max_rx = nic->max_queues;
+ channel->max_tx = nic->max_queues;
+
+ channel->rx_count = nic->rx_queues;
+ channel->tx_count = nic->tx_queues;
+}
+
+/* Set no of Tx, Rx queues to be used */
+static int nicvf_set_channels(struct net_device *dev,
+ struct ethtool_channels *channel)
+{
+ struct nicvf *nic = netdev_priv(dev);
+ int err = 0;
+ bool if_up = netif_running(dev);
+ u8 cqcount, txq_count;
+
+ if (!channel->rx_count || !channel->tx_count)
+ return -EINVAL;
+ if (channel->rx_count > nic->max_queues)
+ return -EINVAL;
+ if (channel->tx_count > nic->max_queues)
+ return -EINVAL;
+
+ if (nic->xdp_prog &&
+ ((channel->tx_count + channel->rx_count) > nic->max_queues)) {
+ netdev_err(nic->netdev,
+ "XDP mode, RXQs + TXQs > Max %d\n",
+ nic->max_queues);
+ return -EINVAL;
+ }
+
+ if (if_up)
+ nicvf_stop(dev);
+
+ nic->rx_queues = channel->rx_count;
+ nic->tx_queues = channel->tx_count;
+ if (!nic->xdp_prog)
+ nic->xdp_tx_queues = 0;
+ else
+ nic->xdp_tx_queues = channel->rx_count;
+
+ txq_count = nic->xdp_tx_queues + nic->tx_queues;
+ cqcount = max(nic->rx_queues, txq_count);
+
+ if (cqcount > MAX_CMP_QUEUES_PER_QS) {
+ nic->sqs_count = roundup(cqcount, MAX_CMP_QUEUES_PER_QS);
+ nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1;
+ } else {
+ nic->sqs_count = 0;
+ }
+
+ nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS);
+ nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS);
+ nic->qs->cq_cnt = max(nic->qs->rq_cnt, nic->qs->sq_cnt);
+
+ err = nicvf_set_real_num_queues(dev, nic->tx_queues, nic->rx_queues);
+ if (err)
+ return err;
+
+ if (if_up)
+ nicvf_open(dev);
+
+ netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n",
+ nic->tx_queues, nic->rx_queues);
+
+ return err;
+}
+
+static void nicvf_get_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct nicvf *nic = netdev_priv(dev);
+ union nic_mbx mbx = {};
+
+ /* Supported only for 10G/40G interfaces */
+ if ((nic->mac_type == BGX_MODE_SGMII) ||
+ (nic->mac_type == BGX_MODE_QSGMII) ||
+ (nic->mac_type == BGX_MODE_RGMII))
+ return;
+
+ mbx.pfc.msg = NIC_MBOX_MSG_PFC;
+ mbx.pfc.get = 1;
+ if (!nicvf_send_msg_to_pf(nic, &mbx)) {
+ pause->autoneg = nic->pfc.autoneg;
+ pause->rx_pause = nic->pfc.fc_rx;
+ pause->tx_pause = nic->pfc.fc_tx;
+ }
+}
+
+static int nicvf_set_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct nicvf *nic = netdev_priv(dev);
+ union nic_mbx mbx = {};
+
+ /* Supported only for 10G/40G interfaces */
+ if ((nic->mac_type == BGX_MODE_SGMII) ||
+ (nic->mac_type == BGX_MODE_QSGMII) ||
+ (nic->mac_type == BGX_MODE_RGMII))
+ return -EOPNOTSUPP;
+
+ if (pause->autoneg)
+ return -EOPNOTSUPP;
+
+ mbx.pfc.msg = NIC_MBOX_MSG_PFC;
+ mbx.pfc.get = 0;
+ mbx.pfc.fc_rx = pause->rx_pause;
+ mbx.pfc.fc_tx = pause->tx_pause;
+ if (nicvf_send_msg_to_pf(nic, &mbx))
+ return -EAGAIN;
+
+ nic->pfc.fc_rx = pause->rx_pause;
+ nic->pfc.fc_tx = pause->tx_pause;
+
+ return 0;
+}
+
+static int nicvf_get_ts_info(struct net_device *netdev,
+ struct ethtool_ts_info *info)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+
+ if (!nic->ptp_clock)
+ return ethtool_op_get_ts_info(netdev, info);
+
+ info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->phc_index = cavium_ptp_clock_index(nic->ptp_clock);
+
+ info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
+
+ info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+ (1 << HWTSTAMP_FILTER_ALL);
+
+ return 0;
+}
+
+static const struct ethtool_ops nicvf_ethtool_ops = {
+ .get_link = nicvf_get_link,
+ .get_drvinfo = nicvf_get_drvinfo,
+ .get_msglevel = nicvf_get_msglevel,
+ .set_msglevel = nicvf_set_msglevel,
+ .get_strings = nicvf_get_strings,
+ .get_sset_count = nicvf_get_sset_count,
+ .get_ethtool_stats = nicvf_get_ethtool_stats,
+ .get_regs_len = nicvf_get_regs_len,
+ .get_regs = nicvf_get_regs,
+ .get_coalesce = nicvf_get_coalesce,
+ .get_ringparam = nicvf_get_ringparam,
+ .set_ringparam = nicvf_set_ringparam,
+ .get_rxnfc = nicvf_get_rxnfc,
+ .set_rxnfc = nicvf_set_rxnfc,
+ .get_rxfh_key_size = nicvf_get_rxfh_key_size,
+ .get_rxfh_indir_size = nicvf_get_rxfh_indir_size,
+ .get_rxfh = nicvf_get_rxfh,
+ .set_rxfh = nicvf_set_rxfh,
+ .get_channels = nicvf_get_channels,
+ .set_channels = nicvf_set_channels,
+ .get_pauseparam = nicvf_get_pauseparam,
+ .set_pauseparam = nicvf_set_pauseparam,
+ .get_ts_info = nicvf_get_ts_info,
+ .get_link_ksettings = nicvf_get_link_ksettings,
+};
+
+void nicvf_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &nicvf_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
new file mode 100644
index 000000000..0fbb0dee2
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -0,0 +1,2310 @@
+/*
+ * Copyright (C) 2015 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/log2.h>
+#include <linux/prefetch.h>
+#include <linux/irq.h>
+#include <linux/iommu.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/filter.h>
+#include <linux/net_tstamp.h>
+#include <linux/workqueue.h>
+
+#include "nic_reg.h"
+#include "nic.h"
+#include "nicvf_queues.h"
+#include "thunder_bgx.h"
+#include "../common/cavium_ptp.h"
+
+#define DRV_NAME "nicvf"
+#define DRV_VERSION "1.0"
+
+/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs
+ * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed
+ * this value, keeping headroom for the 14 byte Ethernet header and two
+ * VLAN tags (for QinQ)
+ */
+#define MAX_XDP_MTU (1530 - ETH_HLEN - VLAN_HLEN * 2)
+
+/* Supported devices */
+static const struct pci_device_id nicvf_id_table[] = {
+ { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+ PCI_DEVICE_ID_THUNDER_NIC_VF,
+ PCI_VENDOR_ID_CAVIUM,
+ PCI_SUBSYS_DEVID_88XX_NIC_VF) },
+ { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+ PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF,
+ PCI_VENDOR_ID_CAVIUM,
+ PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) },
+ { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+ PCI_DEVICE_ID_THUNDER_NIC_VF,
+ PCI_VENDOR_ID_CAVIUM,
+ PCI_SUBSYS_DEVID_81XX_NIC_VF) },
+ { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+ PCI_DEVICE_ID_THUNDER_NIC_VF,
+ PCI_VENDOR_ID_CAVIUM,
+ PCI_SUBSYS_DEVID_83XX_NIC_VF) },
+ { 0, } /* end of table */
+};
+
+MODULE_AUTHOR("Sunil Goutham");
+MODULE_DESCRIPTION("Cavium Thunder NIC Virtual Function Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, nicvf_id_table);
+
+static int debug = 0x00;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "Debug message level bitmap");
+
+static int cpi_alg = CPI_ALG_NONE;
+module_param(cpi_alg, int, 0444);
+MODULE_PARM_DESC(cpi_alg,
+ "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
+
+/* workqueue for handling kernel ndo_set_rx_mode() calls */
+static struct workqueue_struct *nicvf_rx_mode_wq;
+
+static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
+{
+ if (nic->sqs_mode)
+ return qidx + ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS);
+ else
+ return qidx;
+}
+
+/* The Cavium ThunderX network controller can *only* be found in SoCs
+ * containing the ThunderX ARM64 CPU implementation. All accesses to the device
+ * registers on this platform are implicitly strongly ordered with respect
+ * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use
+ * with no memory barriers in this driver. The readq()/writeq() functions add
+ * explicit ordering operation which in this case are redundant, and only
+ * add overhead.
+ */
+
+/* Register read/write APIs */
+void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val)
+{
+ writeq_relaxed(val, nic->reg_base + offset);
+}
+
+u64 nicvf_reg_read(struct nicvf *nic, u64 offset)
+{
+ return readq_relaxed(nic->reg_base + offset);
+}
+
+void nicvf_queue_reg_write(struct nicvf *nic, u64 offset,
+ u64 qidx, u64 val)
+{
+ void __iomem *addr = nic->reg_base + offset;
+
+ writeq_relaxed(val, addr + (qidx << NIC_Q_NUM_SHIFT));
+}
+
+u64 nicvf_queue_reg_read(struct nicvf *nic, u64 offset, u64 qidx)
+{
+ void __iomem *addr = nic->reg_base + offset;
+
+ return readq_relaxed(addr + (qidx << NIC_Q_NUM_SHIFT));
+}
+
+/* VF -> PF mailbox communication */
+static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx)
+{
+ u64 *msg = (u64 *)mbx;
+
+ nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]);
+ nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]);
+}
+
+int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
+{
+ int timeout = NIC_MBOX_MSG_TIMEOUT;
+ int sleep = 10;
+
+ nic->pf_acked = false;
+ nic->pf_nacked = false;
+
+ nicvf_write_to_mbx(nic, mbx);
+
+ /* Wait for previous message to be acked, timeout 2sec */
+ while (!nic->pf_acked) {
+ if (nic->pf_nacked) {
+ netdev_err(nic->netdev,
+ "PF NACK to mbox msg 0x%02x from VF%d\n",
+ (mbx->msg.msg & 0xFF), nic->vf_id);
+ return -EINVAL;
+ }
+ msleep(sleep);
+ if (nic->pf_acked)
+ break;
+ timeout -= sleep;
+ if (!timeout) {
+ netdev_err(nic->netdev,
+ "PF didn't ACK to mbox msg 0x%02x from VF%d\n",
+ (mbx->msg.msg & 0xFF), nic->vf_id);
+ return -EBUSY;
+ }
+ }
+ return 0;
+}
+
+/* Checks if VF is able to comminicate with PF
+* and also gets the VNIC number this VF is associated to.
+*/
+static int nicvf_check_pf_ready(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+
+ mbx.msg.msg = NIC_MBOX_MSG_READY;
+ if (nicvf_send_msg_to_pf(nic, &mbx)) {
+ netdev_err(nic->netdev,
+ "PF didn't respond to READY msg\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+static void nicvf_send_cfg_done(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+
+ mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
+ if (nicvf_send_msg_to_pf(nic, &mbx)) {
+ netdev_err(nic->netdev,
+ "PF didn't respond to CFG DONE msg\n");
+ }
+}
+
+static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx)
+{
+ if (bgx->rx)
+ nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats;
+ else
+ nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats;
+}
+
+static void nicvf_handle_mbx_intr(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+ u64 *mbx_data;
+ u64 mbx_addr;
+ int i;
+
+ mbx_addr = NIC_VF_PF_MAILBOX_0_1;
+ mbx_data = (u64 *)&mbx;
+
+ for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) {
+ *mbx_data = nicvf_reg_read(nic, mbx_addr);
+ mbx_data++;
+ mbx_addr += sizeof(u64);
+ }
+
+ netdev_dbg(nic->netdev, "Mbox message: msg: 0x%x\n", mbx.msg.msg);
+ switch (mbx.msg.msg) {
+ case NIC_MBOX_MSG_READY:
+ nic->pf_acked = true;
+ nic->vf_id = mbx.nic_cfg.vf_id & 0x7F;
+ nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F;
+ nic->node = mbx.nic_cfg.node_id;
+ if (!nic->set_mac_pending)
+ ether_addr_copy(nic->netdev->dev_addr,
+ mbx.nic_cfg.mac_addr);
+ nic->sqs_mode = mbx.nic_cfg.sqs_mode;
+ nic->loopback_supported = mbx.nic_cfg.loopback_supported;
+ nic->link_up = false;
+ nic->duplex = 0;
+ nic->speed = 0;
+ break;
+ case NIC_MBOX_MSG_ACK:
+ nic->pf_acked = true;
+ break;
+ case NIC_MBOX_MSG_NACK:
+ nic->pf_nacked = true;
+ break;
+ case NIC_MBOX_MSG_RSS_SIZE:
+ nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size;
+ nic->pf_acked = true;
+ break;
+ case NIC_MBOX_MSG_BGX_STATS:
+ nicvf_read_bgx_stats(nic, &mbx.bgx_stats);
+ nic->pf_acked = true;
+ break;
+ case NIC_MBOX_MSG_BGX_LINK_CHANGE:
+ nic->pf_acked = true;
+ nic->link_up = mbx.link_status.link_up;
+ nic->duplex = mbx.link_status.duplex;
+ nic->speed = mbx.link_status.speed;
+ nic->mac_type = mbx.link_status.mac_type;
+ if (nic->link_up) {
+ netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n",
+ nic->speed,
+ nic->duplex == DUPLEX_FULL ?
+ "Full" : "Half");
+ netif_carrier_on(nic->netdev);
+ netif_tx_start_all_queues(nic->netdev);
+ } else {
+ netdev_info(nic->netdev, "Link is Down\n");
+ netif_carrier_off(nic->netdev);
+ netif_tx_stop_all_queues(nic->netdev);
+ }
+ break;
+ case NIC_MBOX_MSG_ALLOC_SQS:
+ nic->sqs_count = mbx.sqs_alloc.qs_count;
+ nic->pf_acked = true;
+ break;
+ case NIC_MBOX_MSG_SNICVF_PTR:
+ /* Primary VF: make note of secondary VF's pointer
+ * to be used while packet transmission.
+ */
+ nic->snicvf[mbx.nicvf.sqs_id] =
+ (struct nicvf *)mbx.nicvf.nicvf;
+ nic->pf_acked = true;
+ break;
+ case NIC_MBOX_MSG_PNICVF_PTR:
+ /* Secondary VF/Qset: make note of primary VF's pointer
+ * to be used while packet reception, to handover packet
+ * to primary VF's netdev.
+ */
+ nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf;
+ nic->pf_acked = true;
+ break;
+ case NIC_MBOX_MSG_PFC:
+ nic->pfc.autoneg = mbx.pfc.autoneg;
+ nic->pfc.fc_rx = mbx.pfc.fc_rx;
+ nic->pfc.fc_tx = mbx.pfc.fc_tx;
+ nic->pf_acked = true;
+ break;
+ default:
+ netdev_err(nic->netdev,
+ "Invalid message from PF, msg 0x%x\n", mbx.msg.msg);
+ break;
+ }
+ nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0);
+}
+
+static int nicvf_hw_set_mac_addr(struct nicvf *nic, struct net_device *netdev)
+{
+ union nic_mbx mbx = {};
+
+ mbx.mac.msg = NIC_MBOX_MSG_SET_MAC;
+ mbx.mac.vf_id = nic->vf_id;
+ ether_addr_copy(mbx.mac.mac_addr, netdev->dev_addr);
+
+ return nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_config_cpi(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+
+ mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG;
+ mbx.cpi_cfg.vf_id = nic->vf_id;
+ mbx.cpi_cfg.cpi_alg = nic->cpi_alg;
+ mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt;
+
+ nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_get_rss_size(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+
+ mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
+ mbx.rss_size.vf_id = nic->vf_id;
+ nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+void nicvf_config_rss(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+ struct nicvf_rss_info *rss = &nic->rss_info;
+ int ind_tbl_len = rss->rss_size;
+ int i, nextq = 0;
+
+ mbx.rss_cfg.vf_id = nic->vf_id;
+ mbx.rss_cfg.hash_bits = rss->hash_bits;
+ while (ind_tbl_len) {
+ mbx.rss_cfg.tbl_offset = nextq;
+ mbx.rss_cfg.tbl_len = min(ind_tbl_len,
+ RSS_IND_TBL_LEN_PER_MBX_MSG);
+ mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ?
+ NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG;
+
+ for (i = 0; i < mbx.rss_cfg.tbl_len; i++)
+ mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++];
+
+ nicvf_send_msg_to_pf(nic, &mbx);
+
+ ind_tbl_len -= mbx.rss_cfg.tbl_len;
+ }
+}
+
+void nicvf_set_rss_key(struct nicvf *nic)
+{
+ struct nicvf_rss_info *rss = &nic->rss_info;
+ u64 key_addr = NIC_VNIC_RSS_KEY_0_4;
+ int idx;
+
+ for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) {
+ nicvf_reg_write(nic, key_addr, rss->key[idx]);
+ key_addr += sizeof(u64);
+ }
+}
+
+static int nicvf_rss_init(struct nicvf *nic)
+{
+ struct nicvf_rss_info *rss = &nic->rss_info;
+ int idx;
+
+ nicvf_get_rss_size(nic);
+
+ if (cpi_alg != CPI_ALG_NONE) {
+ rss->enable = false;
+ rss->hash_bits = 0;
+ return 0;
+ }
+
+ rss->enable = true;
+
+ netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64));
+ nicvf_set_rss_key(nic);
+
+ rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA;
+ nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg);
+
+ rss->hash_bits = ilog2(rounddown_pow_of_two(rss->rss_size));
+
+ for (idx = 0; idx < rss->rss_size; idx++)
+ rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx,
+ nic->rx_queues);
+ nicvf_config_rss(nic);
+ return 1;
+}
+
+/* Request PF to allocate additional Qsets */
+static void nicvf_request_sqs(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+ int sqs;
+ int sqs_count = nic->sqs_count;
+ int rx_queues = 0, tx_queues = 0;
+
+ /* Only primary VF should request */
+ if (nic->sqs_mode || !nic->sqs_count)
+ return;
+
+ mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS;
+ mbx.sqs_alloc.vf_id = nic->vf_id;
+ mbx.sqs_alloc.qs_count = nic->sqs_count;
+ if (nicvf_send_msg_to_pf(nic, &mbx)) {
+ /* No response from PF */
+ nic->sqs_count = 0;
+ return;
+ }
+
+ /* Return if no Secondary Qsets available */
+ if (!nic->sqs_count)
+ return;
+
+ if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS)
+ rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS;
+
+ tx_queues = nic->tx_queues + nic->xdp_tx_queues;
+ if (tx_queues > MAX_SND_QUEUES_PER_QS)
+ tx_queues = tx_queues - MAX_SND_QUEUES_PER_QS;
+
+ /* Set no of Rx/Tx queues in each of the SQsets */
+ for (sqs = 0; sqs < nic->sqs_count; sqs++) {
+ mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR;
+ mbx.nicvf.vf_id = nic->vf_id;
+ mbx.nicvf.sqs_id = sqs;
+ nicvf_send_msg_to_pf(nic, &mbx);
+
+ nic->snicvf[sqs]->sqs_id = sqs;
+ if (rx_queues > MAX_RCV_QUEUES_PER_QS) {
+ nic->snicvf[sqs]->qs->rq_cnt = MAX_RCV_QUEUES_PER_QS;
+ rx_queues -= MAX_RCV_QUEUES_PER_QS;
+ } else {
+ nic->snicvf[sqs]->qs->rq_cnt = rx_queues;
+ rx_queues = 0;
+ }
+
+ if (tx_queues > MAX_SND_QUEUES_PER_QS) {
+ nic->snicvf[sqs]->qs->sq_cnt = MAX_SND_QUEUES_PER_QS;
+ tx_queues -= MAX_SND_QUEUES_PER_QS;
+ } else {
+ nic->snicvf[sqs]->qs->sq_cnt = tx_queues;
+ tx_queues = 0;
+ }
+
+ nic->snicvf[sqs]->qs->cq_cnt =
+ max(nic->snicvf[sqs]->qs->rq_cnt, nic->snicvf[sqs]->qs->sq_cnt);
+
+ /* Initialize secondary Qset's queues and its interrupts */
+ nicvf_open(nic->snicvf[sqs]->netdev);
+ }
+
+ /* Update stack with actual Rx/Tx queue count allocated */
+ if (sqs_count != nic->sqs_count)
+ nicvf_set_real_num_queues(nic->netdev,
+ nic->tx_queues, nic->rx_queues);
+}
+
+/* Send this Qset's nicvf pointer to PF.
+ * PF inturn sends primary VF's nicvf struct to secondary Qsets/VFs
+ * so that packets received by these Qsets can use primary VF's netdev
+ */
+static void nicvf_send_vf_struct(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+
+ mbx.nicvf.msg = NIC_MBOX_MSG_NICVF_PTR;
+ mbx.nicvf.sqs_mode = nic->sqs_mode;
+ mbx.nicvf.nicvf = (u64)nic;
+ nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_get_primary_vf_struct(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+
+ mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR;
+ nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+int nicvf_set_real_num_queues(struct net_device *netdev,
+ int tx_queues, int rx_queues)
+{
+ int err = 0;
+
+ err = netif_set_real_num_tx_queues(netdev, tx_queues);
+ if (err) {
+ netdev_err(netdev,
+ "Failed to set no of Tx queues: %d\n", tx_queues);
+ return err;
+ }
+
+ err = netif_set_real_num_rx_queues(netdev, rx_queues);
+ if (err)
+ netdev_err(netdev,
+ "Failed to set no of Rx queues: %d\n", rx_queues);
+ return err;
+}
+
+static int nicvf_init_resources(struct nicvf *nic)
+{
+ int err;
+
+ /* Enable Qset */
+ nicvf_qset_config(nic, true);
+
+ /* Initialize queues and HW for data transfer */
+ err = nicvf_config_data_transfer(nic, true);
+ if (err) {
+ netdev_err(nic->netdev,
+ "Failed to alloc/config VF's QSet resources\n");
+ return err;
+ }
+
+ return 0;
+}
+
+static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
+ struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
+ struct rcv_queue *rq, struct sk_buff **skb)
+{
+ struct xdp_buff xdp;
+ struct page *page;
+ u32 action;
+ u16 len, offset = 0;
+ u64 dma_addr, cpu_addr;
+ void *orig_data;
+
+ /* Retrieve packet buffer's DMA address and length */
+ len = *((u16 *)((void *)cqe_rx + (3 * sizeof(u64))));
+ dma_addr = *((u64 *)((void *)cqe_rx + (7 * sizeof(u64))));
+
+ cpu_addr = nicvf_iova_to_phys(nic, dma_addr);
+ if (!cpu_addr)
+ return false;
+ cpu_addr = (u64)phys_to_virt(cpu_addr);
+ page = virt_to_page((void *)cpu_addr);
+
+ xdp.data_hard_start = page_address(page);
+ xdp.data = (void *)cpu_addr;
+ xdp_set_data_meta_invalid(&xdp);
+ xdp.data_end = xdp.data + len;
+ xdp.rxq = &rq->xdp_rxq;
+ orig_data = xdp.data;
+
+ rcu_read_lock();
+ action = bpf_prog_run_xdp(prog, &xdp);
+ rcu_read_unlock();
+
+ len = xdp.data_end - xdp.data;
+ /* Check if XDP program has changed headers */
+ if (orig_data != xdp.data) {
+ offset = orig_data - xdp.data;
+ dma_addr -= offset;
+ }
+
+ switch (action) {
+ case XDP_PASS:
+ /* Check if it's a recycled page, if not
+ * unmap the DMA mapping.
+ *
+ * Recycled page holds an extra reference.
+ */
+ if (page_ref_count(page) == 1) {
+ dma_addr &= PAGE_MASK;
+ dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
+ RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
+ DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ }
+
+ /* Build SKB and pass on packet to network stack */
+ *skb = build_skb(xdp.data,
+ RCV_FRAG_LEN - cqe_rx->align_pad + offset);
+ if (!*skb)
+ put_page(page);
+ else
+ skb_put(*skb, len);
+ return false;
+ case XDP_TX:
+ nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len);
+ return true;
+ default:
+ bpf_warn_invalid_xdp_action(action);
+ /* fall through */
+ case XDP_ABORTED:
+ trace_xdp_exception(nic->netdev, prog, action);
+ /* fall through */
+ case XDP_DROP:
+ /* Check if it's a recycled page, if not
+ * unmap the DMA mapping.
+ *
+ * Recycled page holds an extra reference.
+ */
+ if (page_ref_count(page) == 1) {
+ dma_addr &= PAGE_MASK;
+ dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
+ RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
+ DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ }
+ put_page(page);
+ return true;
+ }
+ return false;
+}
+
+static void nicvf_snd_ptp_handler(struct net_device *netdev,
+ struct cqe_send_t *cqe_tx)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ struct skb_shared_hwtstamps ts;
+ u64 ns;
+
+ nic = nic->pnicvf;
+
+ /* Sync for 'ptp_skb' */
+ smp_rmb();
+
+ /* New timestamp request can be queued now */
+ atomic_set(&nic->tx_ptp_skbs, 0);
+
+ /* Check for timestamp requested skb */
+ if (!nic->ptp_skb)
+ return;
+
+ /* Check if timestamping is timedout, which is set to 10us */
+ if (cqe_tx->send_status == CQ_TX_ERROP_TSTMP_TIMEOUT ||
+ cqe_tx->send_status == CQ_TX_ERROP_TSTMP_CONFLICT)
+ goto no_tstamp;
+
+ /* Get the timestamp */
+ memset(&ts, 0, sizeof(ts));
+ ns = cavium_ptp_tstamp2time(nic->ptp_clock, cqe_tx->ptp_timestamp);
+ ts.hwtstamp = ns_to_ktime(ns);
+ skb_tstamp_tx(nic->ptp_skb, &ts);
+
+no_tstamp:
+ /* Free the original skb */
+ dev_kfree_skb_any(nic->ptp_skb);
+ nic->ptp_skb = NULL;
+ /* Sync 'ptp_skb' */
+ smp_wmb();
+}
+
+static void nicvf_snd_pkt_handler(struct net_device *netdev,
+ struct cqe_send_t *cqe_tx,
+ int budget, int *subdesc_cnt,
+ unsigned int *tx_pkts, unsigned int *tx_bytes)
+{
+ struct sk_buff *skb = NULL;
+ struct page *page;
+ struct nicvf *nic = netdev_priv(netdev);
+ struct snd_queue *sq;
+ struct sq_hdr_subdesc *hdr;
+ struct sq_hdr_subdesc *tso_sqe;
+
+ sq = &nic->qs->sq[cqe_tx->sq_idx];
+
+ hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr);
+ if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER)
+ return;
+
+ /* Check for errors */
+ if (cqe_tx->send_status)
+ nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx);
+
+ /* Is this a XDP designated Tx queue */
+ if (sq->is_xdp) {
+ page = (struct page *)sq->xdp_page[cqe_tx->sqe_ptr];
+ /* Check if it's recycled page or else unmap DMA mapping */
+ if (page && (page_ref_count(page) == 1))
+ nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr,
+ hdr->subdesc_cnt);
+
+ /* Release page reference for recycling */
+ if (page)
+ put_page(page);
+ sq->xdp_page[cqe_tx->sqe_ptr] = (u64)NULL;
+ *subdesc_cnt += hdr->subdesc_cnt + 1;
+ return;
+ }
+
+ skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr];
+ if (skb) {
+ /* Check for dummy descriptor used for HW TSO offload on 88xx */
+ if (hdr->dont_send) {
+ /* Get actual TSO descriptors and free them */
+ tso_sqe =
+ (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+ nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+ tso_sqe->subdesc_cnt);
+ *subdesc_cnt += tso_sqe->subdesc_cnt + 1;
+ } else {
+ nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr,
+ hdr->subdesc_cnt);
+ }
+ *subdesc_cnt += hdr->subdesc_cnt + 1;
+ prefetch(skb);
+ (*tx_pkts)++;
+ *tx_bytes += skb->len;
+ /* If timestamp is requested for this skb, don't free it */
+ if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
+ !nic->pnicvf->ptp_skb)
+ nic->pnicvf->ptp_skb = skb;
+ else
+ napi_consume_skb(skb, budget);
+ sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL;
+ } else {
+ /* In case of SW TSO on 88xx, only last segment will have
+ * a SKB attached, so just free SQEs here.
+ */
+ if (!nic->hw_tso)
+ *subdesc_cnt += hdr->subdesc_cnt + 1;
+ }
+}
+
+static inline void nicvf_set_rxhash(struct net_device *netdev,
+ struct cqe_rx_t *cqe_rx,
+ struct sk_buff *skb)
+{
+ u8 hash_type;
+ u32 hash;
+
+ if (!(netdev->features & NETIF_F_RXHASH))
+ return;
+
+ switch (cqe_rx->rss_alg) {
+ case RSS_ALG_TCP_IP:
+ case RSS_ALG_UDP_IP:
+ hash_type = PKT_HASH_TYPE_L4;
+ hash = cqe_rx->rss_tag;
+ break;
+ case RSS_ALG_IP:
+ hash_type = PKT_HASH_TYPE_L3;
+ hash = cqe_rx->rss_tag;
+ break;
+ default:
+ hash_type = PKT_HASH_TYPE_NONE;
+ hash = 0;
+ }
+
+ skb_set_hash(skb, hash, hash_type);
+}
+
+static inline void nicvf_set_rxtstamp(struct nicvf *nic, struct sk_buff *skb)
+{
+ u64 ns;
+
+ if (!nic->ptp_clock || !nic->hw_rx_tstamp)
+ return;
+
+ /* The first 8 bytes is the timestamp */
+ ns = cavium_ptp_tstamp2time(nic->ptp_clock,
+ be64_to_cpu(*(__be64 *)skb->data));
+ skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(ns);
+
+ __skb_pull(skb, 8);
+}
+
+static void nicvf_rcv_pkt_handler(struct net_device *netdev,
+ struct napi_struct *napi,
+ struct cqe_rx_t *cqe_rx,
+ struct snd_queue *sq, struct rcv_queue *rq)
+{
+ struct sk_buff *skb = NULL;
+ struct nicvf *nic = netdev_priv(netdev);
+ struct nicvf *snic = nic;
+ int err = 0;
+ int rq_idx;
+
+ rq_idx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx);
+
+ if (nic->sqs_mode) {
+ /* Use primary VF's 'nicvf' struct */
+ nic = nic->pnicvf;
+ netdev = nic->netdev;
+ }
+
+ /* Check for errors */
+ if (cqe_rx->err_level || cqe_rx->err_opcode) {
+ err = nicvf_check_cqe_rx_errs(nic, cqe_rx);
+ if (err && !cqe_rx->rb_cnt)
+ return;
+ }
+
+ /* For XDP, ignore pkts spanning multiple pages */
+ if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) {
+ /* Packet consumed by XDP */
+ if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb))
+ return;
+ } else {
+ skb = nicvf_get_rcv_skb(snic, cqe_rx,
+ nic->xdp_prog ? true : false);
+ }
+
+ if (!skb)
+ return;
+
+ if (netif_msg_pktdata(nic)) {
+ netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len);
+ print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1,
+ skb->data, skb->len, true);
+ }
+
+ /* If error packet, drop it here */
+ if (err) {
+ dev_kfree_skb_any(skb);
+ return;
+ }
+
+ nicvf_set_rxtstamp(nic, skb);
+ nicvf_set_rxhash(netdev, cqe_rx, skb);
+
+ skb_record_rx_queue(skb, rq_idx);
+ if (netdev->hw_features & NETIF_F_RXCSUM) {
+ /* HW by default verifies TCP/UDP/SCTP checksums */
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ } else {
+ skb_checksum_none_assert(skb);
+ }
+
+ skb->protocol = eth_type_trans(skb, netdev);
+
+ /* Check for stripped VLAN */
+ if (cqe_rx->vlan_found && cqe_rx->vlan_stripped)
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ ntohs((__force __be16)cqe_rx->vlan_tci));
+
+ if (napi && (netdev->features & NETIF_F_GRO))
+ napi_gro_receive(napi, skb);
+ else
+ netif_receive_skb(skb);
+}
+
+static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
+ struct napi_struct *napi, int budget)
+{
+ int processed_cqe, work_done = 0, tx_done = 0;
+ int cqe_count, cqe_head;
+ int subdesc_cnt = 0;
+ struct nicvf *nic = netdev_priv(netdev);
+ struct queue_set *qs = nic->qs;
+ struct cmp_queue *cq = &qs->cq[cq_idx];
+ struct cqe_rx_t *cq_desc;
+ struct netdev_queue *txq;
+ struct snd_queue *sq = &qs->sq[cq_idx];
+ struct rcv_queue *rq = &qs->rq[cq_idx];
+ unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx;
+
+ spin_lock_bh(&cq->lock);
+loop:
+ processed_cqe = 0;
+ /* Get no of valid CQ entries to process */
+ cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx);
+ cqe_count &= CQ_CQE_COUNT;
+ if (!cqe_count)
+ goto done;
+
+ /* Get head of the valid CQ entries */
+ cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9;
+ cqe_head &= 0xFFFF;
+
+ while (processed_cqe < cqe_count) {
+ /* Get the CQ descriptor */
+ cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head);
+ cqe_head++;
+ cqe_head &= (cq->dmem.q_len - 1);
+ /* Initiate prefetch for next descriptor */
+ prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head));
+
+ if ((work_done >= budget) && napi &&
+ (cq_desc->cqe_type != CQE_TYPE_SEND)) {
+ break;
+ }
+
+ switch (cq_desc->cqe_type) {
+ case CQE_TYPE_RX:
+ nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq);
+ work_done++;
+ break;
+ case CQE_TYPE_SEND:
+ nicvf_snd_pkt_handler(netdev, (void *)cq_desc,
+ budget, &subdesc_cnt,
+ &tx_pkts, &tx_bytes);
+ tx_done++;
+ break;
+ case CQE_TYPE_SEND_PTP:
+ nicvf_snd_ptp_handler(netdev, (void *)cq_desc);
+ break;
+ case CQE_TYPE_INVALID:
+ case CQE_TYPE_RX_SPLIT:
+ case CQE_TYPE_RX_TCP:
+ /* Ignore for now */
+ break;
+ }
+ processed_cqe++;
+ }
+
+ /* Ring doorbell to inform H/W to reuse processed CQEs */
+ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR,
+ cq_idx, processed_cqe);
+
+ if ((work_done < budget) && napi)
+ goto loop;
+
+done:
+ /* Update SQ's descriptor free count */
+ if (subdesc_cnt)
+ nicvf_put_sq_desc(sq, subdesc_cnt);
+
+ txq_idx = nicvf_netdev_qidx(nic, cq_idx);
+ /* Handle XDP TX queues */
+ if (nic->pnicvf->xdp_prog) {
+ if (txq_idx < nic->pnicvf->xdp_tx_queues) {
+ nicvf_xdp_sq_doorbell(nic, sq, cq_idx);
+ goto out;
+ }
+ nic = nic->pnicvf;
+ txq_idx -= nic->pnicvf->xdp_tx_queues;
+ }
+
+ /* Wakeup TXQ if its stopped earlier due to SQ full */
+ if (tx_done ||
+ (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) {
+ netdev = nic->pnicvf->netdev;
+ txq = netdev_get_tx_queue(netdev, txq_idx);
+ if (tx_pkts)
+ netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
+
+ /* To read updated queue and carrier status */
+ smp_mb();
+ if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) {
+ netif_tx_wake_queue(txq);
+ nic = nic->pnicvf;
+ this_cpu_inc(nic->drv_stats->txq_wake);
+ netif_warn(nic, tx_err, netdev,
+ "Transmit queue wakeup SQ%d\n", txq_idx);
+ }
+ }
+
+out:
+ spin_unlock_bh(&cq->lock);
+ return work_done;
+}
+
+static int nicvf_poll(struct napi_struct *napi, int budget)
+{
+ u64 cq_head;
+ int work_done = 0;
+ struct net_device *netdev = napi->dev;
+ struct nicvf *nic = netdev_priv(netdev);
+ struct nicvf_cq_poll *cq;
+
+ cq = container_of(napi, struct nicvf_cq_poll, napi);
+ work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget);
+
+ if (work_done < budget) {
+ /* Slow packet rate, exit polling */
+ napi_complete_done(napi, work_done);
+ /* Re-enable interrupts */
+ cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD,
+ cq->cq_idx);
+ nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->cq_idx);
+ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD,
+ cq->cq_idx, cq_head);
+ nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->cq_idx);
+ }
+ return work_done;
+}
+
+/* Qset error interrupt handler
+ *
+ * As of now only CQ errors are handled
+ */
+static void nicvf_handle_qs_err(unsigned long data)
+{
+ struct nicvf *nic = (struct nicvf *)data;
+ struct queue_set *qs = nic->qs;
+ int qidx;
+ u64 status;
+
+ netif_tx_disable(nic->netdev);
+
+ /* Check if it is CQ err */
+ for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
+ status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS,
+ qidx);
+ if (!(status & CQ_ERR_MASK))
+ continue;
+ /* Process already queued CQEs and reconfig CQ */
+ nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
+ nicvf_sq_disable(nic, qidx);
+ nicvf_cq_intr_handler(nic->netdev, qidx, NULL, 0);
+ nicvf_cmp_queue_config(nic, qs, qidx, true);
+ nicvf_sq_free_used_descs(nic->netdev, &qs->sq[qidx], qidx);
+ nicvf_sq_enable(nic, &qs->sq[qidx], qidx);
+
+ nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
+ }
+
+ netif_tx_start_all_queues(nic->netdev);
+ /* Re-enable Qset error interrupt */
+ nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
+}
+
+static void nicvf_dump_intr_status(struct nicvf *nic)
+{
+ netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n",
+ nicvf_reg_read(nic, NIC_VF_INT));
+}
+
+static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq)
+{
+ struct nicvf *nic = (struct nicvf *)nicvf_irq;
+ u64 intr;
+
+ nicvf_dump_intr_status(nic);
+
+ intr = nicvf_reg_read(nic, NIC_VF_INT);
+ /* Check for spurious interrupt */
+ if (!(intr & NICVF_INTR_MBOX_MASK))
+ return IRQ_HANDLED;
+
+ nicvf_handle_mbx_intr(nic);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq)
+{
+ struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq;
+ struct nicvf *nic = cq_poll->nicvf;
+ int qidx = cq_poll->cq_idx;
+
+ nicvf_dump_intr_status(nic);
+
+ /* Disable interrupts */
+ nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
+
+ /* Schedule NAPI */
+ napi_schedule_irqoff(&cq_poll->napi);
+
+ /* Clear interrupt */
+ nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t nicvf_rbdr_intr_handler(int irq, void *nicvf_irq)
+{
+ struct nicvf *nic = (struct nicvf *)nicvf_irq;
+ u8 qidx;
+
+
+ nicvf_dump_intr_status(nic);
+
+ /* Disable RBDR interrupt and schedule softirq */
+ for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) {
+ if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx))
+ continue;
+ nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
+ tasklet_hi_schedule(&nic->rbdr_task);
+ /* Clear interrupt */
+ nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq)
+{
+ struct nicvf *nic = (struct nicvf *)nicvf_irq;
+
+ nicvf_dump_intr_status(nic);
+
+ /* Disable Qset err interrupt and schedule softirq */
+ nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
+ tasklet_hi_schedule(&nic->qs_err_task);
+ nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0);
+
+ return IRQ_HANDLED;
+}
+
+static void nicvf_set_irq_affinity(struct nicvf *nic)
+{
+ int vec, cpu;
+
+ for (vec = 0; vec < nic->num_vec; vec++) {
+ if (!nic->irq_allocated[vec])
+ continue;
+
+ if (!zalloc_cpumask_var(&nic->affinity_mask[vec], GFP_KERNEL))
+ return;
+ /* CQ interrupts */
+ if (vec < NICVF_INTR_ID_SQ)
+ /* Leave CPU0 for RBDR and other interrupts */
+ cpu = nicvf_netdev_qidx(nic, vec) + 1;
+ else
+ cpu = 0;
+
+ cpumask_set_cpu(cpumask_local_spread(cpu, nic->node),
+ nic->affinity_mask[vec]);
+ irq_set_affinity_hint(pci_irq_vector(nic->pdev, vec),
+ nic->affinity_mask[vec]);
+ }
+}
+
+static int nicvf_register_interrupts(struct nicvf *nic)
+{
+ int irq, ret = 0;
+
+ for_each_cq_irq(irq)
+ sprintf(nic->irq_name[irq], "%s-rxtx-%d",
+ nic->pnicvf->netdev->name,
+ nicvf_netdev_qidx(nic, irq));
+
+ for_each_sq_irq(irq)
+ sprintf(nic->irq_name[irq], "%s-sq-%d",
+ nic->pnicvf->netdev->name,
+ nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ));
+
+ for_each_rbdr_irq(irq)
+ sprintf(nic->irq_name[irq], "%s-rbdr-%d",
+ nic->pnicvf->netdev->name,
+ nic->sqs_mode ? (nic->sqs_id + 1) : 0);
+
+ /* Register CQ interrupts */
+ for (irq = 0; irq < nic->qs->cq_cnt; irq++) {
+ ret = request_irq(pci_irq_vector(nic->pdev, irq),
+ nicvf_intr_handler,
+ 0, nic->irq_name[irq], nic->napi[irq]);
+ if (ret)
+ goto err;
+ nic->irq_allocated[irq] = true;
+ }
+
+ /* Register RBDR interrupt */
+ for (irq = NICVF_INTR_ID_RBDR;
+ irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) {
+ ret = request_irq(pci_irq_vector(nic->pdev, irq),
+ nicvf_rbdr_intr_handler,
+ 0, nic->irq_name[irq], nic);
+ if (ret)
+ goto err;
+ nic->irq_allocated[irq] = true;
+ }
+
+ /* Register QS error interrupt */
+ sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d",
+ nic->pnicvf->netdev->name,
+ nic->sqs_mode ? (nic->sqs_id + 1) : 0);
+ irq = NICVF_INTR_ID_QS_ERR;
+ ret = request_irq(pci_irq_vector(nic->pdev, irq),
+ nicvf_qs_err_intr_handler,
+ 0, nic->irq_name[irq], nic);
+ if (ret)
+ goto err;
+
+ nic->irq_allocated[irq] = true;
+
+ /* Set IRQ affinities */
+ nicvf_set_irq_affinity(nic);
+
+err:
+ if (ret)
+ netdev_err(nic->netdev, "request_irq failed, vector %d\n", irq);
+
+ return ret;
+}
+
+static void nicvf_unregister_interrupts(struct nicvf *nic)
+{
+ struct pci_dev *pdev = nic->pdev;
+ int irq;
+
+ /* Free registered interrupts */
+ for (irq = 0; irq < nic->num_vec; irq++) {
+ if (!nic->irq_allocated[irq])
+ continue;
+
+ irq_set_affinity_hint(pci_irq_vector(pdev, irq), NULL);
+ free_cpumask_var(nic->affinity_mask[irq]);
+
+ if (irq < NICVF_INTR_ID_SQ)
+ free_irq(pci_irq_vector(pdev, irq), nic->napi[irq]);
+ else
+ free_irq(pci_irq_vector(pdev, irq), nic);
+
+ nic->irq_allocated[irq] = false;
+ }
+
+ /* Disable MSI-X */
+ pci_free_irq_vectors(pdev);
+ nic->num_vec = 0;
+}
+
+/* Initialize MSIX vectors and register MISC interrupt.
+ * Send READY message to PF to check if its alive
+ */
+static int nicvf_register_misc_interrupt(struct nicvf *nic)
+{
+ int ret = 0;
+ int irq = NICVF_INTR_ID_MISC;
+
+ /* Return if mailbox interrupt is already registered */
+ if (nic->pdev->msix_enabled)
+ return 0;
+
+ /* Enable MSI-X */
+ nic->num_vec = pci_msix_vec_count(nic->pdev);
+ ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec,
+ PCI_IRQ_MSIX);
+ if (ret < 0) {
+ netdev_err(nic->netdev,
+ "Req for #%d msix vectors failed\n", nic->num_vec);
+ return ret;
+ }
+
+ sprintf(nic->irq_name[irq], "%s Mbox", "NICVF");
+ /* Register Misc interrupt */
+ ret = request_irq(pci_irq_vector(nic->pdev, irq),
+ nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic);
+
+ if (ret)
+ return ret;
+ nic->irq_allocated[irq] = true;
+
+ /* Enable mailbox interrupt */
+ nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0);
+
+ /* Check if VF is able to communicate with PF */
+ if (!nicvf_check_pf_ready(nic)) {
+ nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
+ nicvf_unregister_interrupts(nic);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ int qid = skb_get_queue_mapping(skb);
+ struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid);
+ struct nicvf *snic;
+ struct snd_queue *sq;
+ int tmp;
+
+ /* Check for minimum packet length */
+ if (skb->len <= ETH_HLEN) {
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+ /* In XDP case, initial HW tx queues are used for XDP,
+ * but stack's queue mapping starts at '0', so skip the
+ * Tx queues attached to Rx queues for XDP.
+ */
+ if (nic->xdp_prog)
+ qid += nic->xdp_tx_queues;
+
+ snic = nic;
+ /* Get secondary Qset's SQ structure */
+ if (qid >= MAX_SND_QUEUES_PER_QS) {
+ tmp = qid / MAX_SND_QUEUES_PER_QS;
+ snic = (struct nicvf *)nic->snicvf[tmp - 1];
+ if (!snic) {
+ netdev_warn(nic->netdev,
+ "Secondary Qset#%d's ptr not initialized\n",
+ tmp - 1);
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+ qid = qid % MAX_SND_QUEUES_PER_QS;
+ }
+
+ sq = &snic->qs->sq[qid];
+ if (!netif_tx_queue_stopped(txq) &&
+ !nicvf_sq_append_skb(snic, sq, skb, qid)) {
+ netif_tx_stop_queue(txq);
+
+ /* Barrier, so that stop_queue visible to other cpus */
+ smp_mb();
+
+ /* Check again, incase another cpu freed descriptors */
+ if (atomic_read(&sq->free_cnt) > MIN_SQ_DESC_PER_PKT_XMIT) {
+ netif_tx_wake_queue(txq);
+ } else {
+ this_cpu_inc(nic->drv_stats->txq_stop);
+ netif_warn(nic, tx_err, netdev,
+ "Transmit ring full, stopping SQ%d\n", qid);
+ }
+ return NETDEV_TX_BUSY;
+ }
+
+ return NETDEV_TX_OK;
+}
+
+static inline void nicvf_free_cq_poll(struct nicvf *nic)
+{
+ struct nicvf_cq_poll *cq_poll;
+ int qidx;
+
+ for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) {
+ cq_poll = nic->napi[qidx];
+ if (!cq_poll)
+ continue;
+ nic->napi[qidx] = NULL;
+ kfree(cq_poll);
+ }
+}
+
+int nicvf_stop(struct net_device *netdev)
+{
+ int irq, qidx;
+ struct nicvf *nic = netdev_priv(netdev);
+ struct queue_set *qs = nic->qs;
+ struct nicvf_cq_poll *cq_poll = NULL;
+ union nic_mbx mbx = {};
+
+ mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN;
+ nicvf_send_msg_to_pf(nic, &mbx);
+
+ netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(nic->netdev);
+ nic->link_up = false;
+
+ /* Teardown secondary qsets first */
+ if (!nic->sqs_mode) {
+ for (qidx = 0; qidx < nic->sqs_count; qidx++) {
+ if (!nic->snicvf[qidx])
+ continue;
+ nicvf_stop(nic->snicvf[qidx]->netdev);
+ nic->snicvf[qidx] = NULL;
+ }
+ }
+
+ /* Disable RBDR & QS error interrupts */
+ for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
+ nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
+ nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx);
+ }
+ nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
+ nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0);
+
+ /* Wait for pending IRQ handlers to finish */
+ for (irq = 0; irq < nic->num_vec; irq++)
+ synchronize_irq(pci_irq_vector(nic->pdev, irq));
+
+ tasklet_kill(&nic->rbdr_task);
+ tasklet_kill(&nic->qs_err_task);
+ if (nic->rb_work_scheduled)
+ cancel_delayed_work_sync(&nic->rbdr_work);
+
+ for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) {
+ cq_poll = nic->napi[qidx];
+ if (!cq_poll)
+ continue;
+ napi_synchronize(&cq_poll->napi);
+ /* CQ intr is enabled while napi_complete,
+ * so disable it now
+ */
+ nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
+ nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
+ napi_disable(&cq_poll->napi);
+ netif_napi_del(&cq_poll->napi);
+ }
+
+ netif_tx_disable(netdev);
+
+ for (qidx = 0; qidx < netdev->num_tx_queues; qidx++)
+ netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx));
+
+ /* Free resources */
+ nicvf_config_data_transfer(nic, false);
+
+ /* Disable HW Qset */
+ nicvf_qset_config(nic, false);
+
+ /* disable mailbox interrupt */
+ nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
+
+ nicvf_unregister_interrupts(nic);
+
+ nicvf_free_cq_poll(nic);
+
+ /* Free any pending SKB saved to receive timestamp */
+ if (nic->ptp_skb) {
+ dev_kfree_skb_any(nic->ptp_skb);
+ nic->ptp_skb = NULL;
+ }
+
+ /* Clear multiqset info */
+ nic->pnicvf = nic;
+
+ return 0;
+}
+
+static int nicvf_config_hw_rx_tstamp(struct nicvf *nic, bool enable)
+{
+ union nic_mbx mbx = {};
+
+ mbx.ptp.msg = NIC_MBOX_MSG_PTP_CFG;
+ mbx.ptp.enable = enable;
+
+ return nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
+{
+ union nic_mbx mbx = {};
+
+ mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS;
+ mbx.frs.max_frs = mtu;
+ mbx.frs.vf_id = nic->vf_id;
+
+ return nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+int nicvf_open(struct net_device *netdev)
+{
+ int cpu, err, qidx;
+ struct nicvf *nic = netdev_priv(netdev);
+ struct queue_set *qs = nic->qs;
+ struct nicvf_cq_poll *cq_poll = NULL;
+
+ netif_carrier_off(netdev);
+
+ err = nicvf_register_misc_interrupt(nic);
+ if (err)
+ return err;
+
+ /* Register NAPI handler for processing CQEs */
+ for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
+ cq_poll = kzalloc(sizeof(*cq_poll), GFP_KERNEL);
+ if (!cq_poll) {
+ err = -ENOMEM;
+ goto napi_del;
+ }
+ cq_poll->cq_idx = qidx;
+ cq_poll->nicvf = nic;
+ netif_napi_add(netdev, &cq_poll->napi, nicvf_poll,
+ NAPI_POLL_WEIGHT);
+ napi_enable(&cq_poll->napi);
+ nic->napi[qidx] = cq_poll;
+ }
+
+ /* Check if we got MAC address from PF or else generate a radom MAC */
+ if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) {
+ eth_hw_addr_random(netdev);
+ nicvf_hw_set_mac_addr(nic, netdev);
+ }
+
+ if (nic->set_mac_pending) {
+ nic->set_mac_pending = false;
+ nicvf_hw_set_mac_addr(nic, netdev);
+ }
+
+ /* Init tasklet for handling Qset err interrupt */
+ tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err,
+ (unsigned long)nic);
+
+ /* Init RBDR tasklet which will refill RBDR */
+ tasklet_init(&nic->rbdr_task, nicvf_rbdr_task,
+ (unsigned long)nic);
+ INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work);
+
+ /* Configure CPI alorithm */
+ nic->cpi_alg = cpi_alg;
+ if (!nic->sqs_mode)
+ nicvf_config_cpi(nic);
+
+ nicvf_request_sqs(nic);
+ if (nic->sqs_mode)
+ nicvf_get_primary_vf_struct(nic);
+
+ /* Configure PTP timestamp */
+ if (nic->ptp_clock)
+ nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp);
+ atomic_set(&nic->tx_ptp_skbs, 0);
+ nic->ptp_skb = NULL;
+
+ /* Configure receive side scaling and MTU */
+ if (!nic->sqs_mode) {
+ nicvf_rss_init(nic);
+ err = nicvf_update_hw_max_frs(nic, netdev->mtu);
+ if (err)
+ goto cleanup;
+
+ /* Clear percpu stats */
+ for_each_possible_cpu(cpu)
+ memset(per_cpu_ptr(nic->drv_stats, cpu), 0,
+ sizeof(struct nicvf_drv_stats));
+ }
+
+ err = nicvf_register_interrupts(nic);
+ if (err)
+ goto cleanup;
+
+ /* Initialize the queues */
+ err = nicvf_init_resources(nic);
+ if (err)
+ goto cleanup;
+
+ /* Make sure queue initialization is written */
+ wmb();
+
+ nicvf_reg_write(nic, NIC_VF_INT, -1);
+ /* Enable Qset err interrupt */
+ nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
+
+ /* Enable completion queue interrupt */
+ for (qidx = 0; qidx < qs->cq_cnt; qidx++)
+ nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
+
+ /* Enable RBDR threshold interrupt */
+ for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
+ nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
+
+ /* Send VF config done msg to PF */
+ nicvf_send_cfg_done(nic);
+
+ return 0;
+cleanup:
+ nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
+ nicvf_unregister_interrupts(nic);
+ tasklet_kill(&nic->qs_err_task);
+ tasklet_kill(&nic->rbdr_task);
+napi_del:
+ for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
+ cq_poll = nic->napi[qidx];
+ if (!cq_poll)
+ continue;
+ napi_disable(&cq_poll->napi);
+ netif_napi_del(&cq_poll->napi);
+ }
+ nicvf_free_cq_poll(nic);
+ return err;
+}
+
+static int nicvf_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ int orig_mtu = netdev->mtu;
+
+ /* For now just support only the usual MTU sized frames,
+ * plus some headroom for VLAN, QinQ.
+ */
+ if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) {
+ netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
+ netdev->mtu);
+ return -EINVAL;
+ }
+
+ netdev->mtu = new_mtu;
+
+ if (!netif_running(netdev))
+ return 0;
+
+ if (nicvf_update_hw_max_frs(nic, new_mtu)) {
+ netdev->mtu = orig_mtu;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nicvf_set_mac_address(struct net_device *netdev, void *p)
+{
+ struct sockaddr *addr = p;
+ struct nicvf *nic = netdev_priv(netdev);
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+
+ if (nic->pdev->msix_enabled) {
+ if (nicvf_hw_set_mac_addr(nic, netdev))
+ return -EBUSY;
+ } else {
+ nic->set_mac_pending = true;
+ }
+
+ return 0;
+}
+
+void nicvf_update_lmac_stats(struct nicvf *nic)
+{
+ int stat = 0;
+ union nic_mbx mbx = {};
+
+ if (!netif_running(nic->netdev))
+ return;
+
+ mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS;
+ mbx.bgx_stats.vf_id = nic->vf_id;
+ /* Rx stats */
+ mbx.bgx_stats.rx = 1;
+ while (stat < BGX_RX_STATS_COUNT) {
+ mbx.bgx_stats.idx = stat;
+ if (nicvf_send_msg_to_pf(nic, &mbx))
+ return;
+ stat++;
+ }
+
+ stat = 0;
+
+ /* Tx stats */
+ mbx.bgx_stats.rx = 0;
+ while (stat < BGX_TX_STATS_COUNT) {
+ mbx.bgx_stats.idx = stat;
+ if (nicvf_send_msg_to_pf(nic, &mbx))
+ return;
+ stat++;
+ }
+}
+
+void nicvf_update_stats(struct nicvf *nic)
+{
+ int qidx, cpu;
+ u64 tmp_stats = 0;
+ struct nicvf_hw_stats *stats = &nic->hw_stats;
+ struct nicvf_drv_stats *drv_stats;
+ struct queue_set *qs = nic->qs;
+
+#define GET_RX_STATS(reg) \
+ nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3))
+#define GET_TX_STATS(reg) \
+ nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3))
+
+ stats->rx_bytes = GET_RX_STATS(RX_OCTS);
+ stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST);
+ stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST);
+ stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST);
+ stats->rx_fcs_errors = GET_RX_STATS(RX_FCS);
+ stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR);
+ stats->rx_drop_red = GET_RX_STATS(RX_RED);
+ stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS);
+ stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN);
+ stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS);
+ stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST);
+ stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST);
+ stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST);
+ stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST);
+
+ stats->tx_bytes = GET_TX_STATS(TX_OCTS);
+ stats->tx_ucast_frames = GET_TX_STATS(TX_UCAST);
+ stats->tx_bcast_frames = GET_TX_STATS(TX_BCAST);
+ stats->tx_mcast_frames = GET_TX_STATS(TX_MCAST);
+ stats->tx_drops = GET_TX_STATS(TX_DROP);
+
+ /* On T88 pass 2.0, the dummy SQE added for TSO notification
+ * via CQE has 'dont_send' set. Hence HW drops the pkt pointed
+ * pointed by dummy SQE and results in tx_drops counter being
+ * incremented. Subtracting it from tx_tso counter will give
+ * exact tx_drops counter.
+ */
+ if (nic->t88 && nic->hw_tso) {
+ for_each_possible_cpu(cpu) {
+ drv_stats = per_cpu_ptr(nic->drv_stats, cpu);
+ tmp_stats += drv_stats->tx_tso;
+ }
+ stats->tx_drops = tmp_stats - stats->tx_drops;
+ }
+ stats->tx_frames = stats->tx_ucast_frames +
+ stats->tx_bcast_frames +
+ stats->tx_mcast_frames;
+ stats->rx_frames = stats->rx_ucast_frames +
+ stats->rx_bcast_frames +
+ stats->rx_mcast_frames;
+ stats->rx_drops = stats->rx_drop_red +
+ stats->rx_drop_overrun;
+
+ /* Update RQ and SQ stats */
+ for (qidx = 0; qidx < qs->rq_cnt; qidx++)
+ nicvf_update_rq_stats(nic, qidx);
+ for (qidx = 0; qidx < qs->sq_cnt; qidx++)
+ nicvf_update_sq_stats(nic, qidx);
+}
+
+static void nicvf_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ struct nicvf_hw_stats *hw_stats = &nic->hw_stats;
+
+ nicvf_update_stats(nic);
+
+ stats->rx_bytes = hw_stats->rx_bytes;
+ stats->rx_packets = hw_stats->rx_frames;
+ stats->rx_dropped = hw_stats->rx_drops;
+ stats->multicast = hw_stats->rx_mcast_frames;
+
+ stats->tx_bytes = hw_stats->tx_bytes;
+ stats->tx_packets = hw_stats->tx_frames;
+ stats->tx_dropped = hw_stats->tx_drops;
+
+}
+
+static void nicvf_tx_timeout(struct net_device *dev)
+{
+ struct nicvf *nic = netdev_priv(dev);
+
+ netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n");
+
+ this_cpu_inc(nic->drv_stats->tx_timeout);
+ schedule_work(&nic->reset_task);
+}
+
+static void nicvf_reset_task(struct work_struct *work)
+{
+ struct nicvf *nic;
+
+ nic = container_of(work, struct nicvf, reset_task);
+
+ if (!netif_running(nic->netdev))
+ return;
+
+ nicvf_stop(nic->netdev);
+ nicvf_open(nic->netdev);
+ netif_trans_update(nic->netdev);
+}
+
+static int nicvf_config_loopback(struct nicvf *nic,
+ netdev_features_t features)
+{
+ union nic_mbx mbx = {};
+
+ mbx.lbk.msg = NIC_MBOX_MSG_LOOPBACK;
+ mbx.lbk.vf_id = nic->vf_id;
+ mbx.lbk.enable = (features & NETIF_F_LOOPBACK) != 0;
+
+ return nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static netdev_features_t nicvf_fix_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+
+ if ((features & NETIF_F_LOOPBACK) &&
+ netif_running(netdev) && !nic->loopback_supported)
+ features &= ~NETIF_F_LOOPBACK;
+
+ return features;
+}
+
+static int nicvf_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ netdev_features_t changed = features ^ netdev->features;
+
+ if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+ nicvf_config_vlan_stripping(nic, features);
+
+ if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev))
+ return nicvf_config_loopback(nic, features);
+
+ return 0;
+}
+
+static void nicvf_set_xdp_queues(struct nicvf *nic, bool bpf_attached)
+{
+ u8 cq_count, txq_count;
+
+ /* Set XDP Tx queue count same as Rx queue count */
+ if (!bpf_attached)
+ nic->xdp_tx_queues = 0;
+ else
+ nic->xdp_tx_queues = nic->rx_queues;
+
+ /* If queue count > MAX_CMP_QUEUES_PER_QS, then additional qsets
+ * needs to be allocated, check how many.
+ */
+ txq_count = nic->xdp_tx_queues + nic->tx_queues;
+ cq_count = max(nic->rx_queues, txq_count);
+ if (cq_count > MAX_CMP_QUEUES_PER_QS) {
+ nic->sqs_count = roundup(cq_count, MAX_CMP_QUEUES_PER_QS);
+ nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1;
+ } else {
+ nic->sqs_count = 0;
+ }
+
+ /* Set primary Qset's resources */
+ nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS);
+ nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS);
+ nic->qs->cq_cnt = max_t(u8, nic->qs->rq_cnt, nic->qs->sq_cnt);
+
+ /* Update stack */
+ nicvf_set_real_num_queues(nic->netdev, nic->tx_queues, nic->rx_queues);
+}
+
+static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
+{
+ struct net_device *dev = nic->netdev;
+ bool if_up = netif_running(nic->netdev);
+ struct bpf_prog *old_prog;
+ bool bpf_attached = false;
+ int ret = 0;
+
+ /* For now just support only the usual MTU sized frames,
+ * plus some headroom for VLAN, QinQ.
+ */
+ if (prog && dev->mtu > MAX_XDP_MTU) {
+ netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
+ dev->mtu);
+ return -EOPNOTSUPP;
+ }
+
+ /* ALL SQs attached to CQs i.e same as RQs, are treated as
+ * XDP Tx queues and more Tx queues are allocated for
+ * network stack to send pkts out.
+ *
+ * No of Tx queues are either same as Rx queues or whatever
+ * is left in max no of queues possible.
+ */
+ if ((nic->rx_queues + nic->tx_queues) > nic->max_queues) {
+ netdev_warn(dev,
+ "Failed to attach BPF prog, RXQs + TXQs > Max %d\n",
+ nic->max_queues);
+ return -ENOMEM;
+ }
+
+ if (if_up)
+ nicvf_stop(nic->netdev);
+
+ old_prog = xchg(&nic->xdp_prog, prog);
+ /* Detach old prog, if any */
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (nic->xdp_prog) {
+ /* Attach BPF program */
+ nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
+ if (!IS_ERR(nic->xdp_prog)) {
+ bpf_attached = true;
+ } else {
+ ret = PTR_ERR(nic->xdp_prog);
+ nic->xdp_prog = NULL;
+ }
+ }
+
+ /* Calculate Tx queues needed for XDP and network stack */
+ nicvf_set_xdp_queues(nic, bpf_attached);
+
+ if (if_up) {
+ /* Reinitialize interface, clean slate */
+ nicvf_open(nic->netdev);
+ netif_trans_update(nic->netdev);
+ }
+
+ return ret;
+}
+
+static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+
+ /* To avoid checks while retrieving buffer address from CQE_RX,
+ * do not support XDP for T88 pass1.x silicons which are anyway
+ * not in use widely.
+ */
+ if (pass1_silicon(nic->pdev))
+ return -EOPNOTSUPP;
+
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return nicvf_xdp_setup(nic, xdp->prog);
+ case XDP_QUERY_PROG:
+ xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ struct nicvf *nic = netdev_priv(netdev);
+
+ if (!nic->ptp_clock)
+ return -ENODEV;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ /* reserved for future extensions */
+ if (config.flags)
+ return -EINVAL;
+
+ switch (config.tx_type) {
+ case HWTSTAMP_TX_OFF:
+ case HWTSTAMP_TX_ON:
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ switch (config.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ nic->hw_rx_tstamp = false;
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ nic->hw_rx_tstamp = true;
+ config.rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ if (netif_running(netdev))
+ nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp);
+
+ if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int nicvf_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
+{
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return nicvf_config_hwtstamp(netdev, req);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
+ struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+ int idx;
+
+ /* From the inside of VM code flow we have only 128 bits memory
+ * available to send message to host's PF, so send all mc addrs
+ * one by one, starting from flush command in case if kernel
+ * requests to configure specific MAC filtering
+ */
+
+ /* flush DMAC filters and reset RX mode */
+ mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST;
+ if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+ goto free_mc;
+
+ if (mode & BGX_XCAST_MCAST_FILTER) {
+ /* once enabling filtering, we need to signal to PF to add
+ * its' own LMAC to the filter to accept packets for it.
+ */
+ mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
+ mbx.xcast.data.mac = 0;
+ if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+ goto free_mc;
+ }
+
+ /* check if we have any specific MACs to be added to PF DMAC filter */
+ if (mc_addrs) {
+ /* now go through kernel list of MACs and add them one by one */
+ for (idx = 0; idx < mc_addrs->count; idx++) {
+ mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
+ mbx.xcast.data.mac = mc_addrs->mc[idx];
+ if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+ goto free_mc;
+ }
+ }
+
+ /* and finally set rx mode for PF accordingly */
+ mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST;
+ mbx.xcast.data.mode = mode;
+
+ nicvf_send_msg_to_pf(nic, &mbx);
+free_mc:
+ kfree(mc_addrs);
+}
+
+static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
+{
+ struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work,
+ work.work);
+ struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
+ u8 mode;
+ struct xcast_addr_list *mc;
+
+ if (!vf_work)
+ return;
+
+ /* Save message data locally to prevent them from
+ * being overwritten by next ndo_set_rx_mode call().
+ */
+ spin_lock_bh(&nic->rx_mode_wq_lock);
+ mode = vf_work->mode;
+ mc = vf_work->mc;
+ vf_work->mc = NULL;
+ spin_unlock_bh(&nic->rx_mode_wq_lock);
+
+ __nicvf_set_rx_mode_task(mode, mc, nic);
+}
+
+static void nicvf_set_rx_mode(struct net_device *netdev)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ struct netdev_hw_addr *ha;
+ struct xcast_addr_list *mc_list = NULL;
+ u8 mode = 0;
+
+ if (netdev->flags & IFF_PROMISC) {
+ mode = BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT;
+ } else {
+ if (netdev->flags & IFF_BROADCAST)
+ mode |= BGX_XCAST_BCAST_ACCEPT;
+
+ if (netdev->flags & IFF_ALLMULTI) {
+ mode |= BGX_XCAST_MCAST_ACCEPT;
+ } else if (netdev->flags & IFF_MULTICAST) {
+ mode |= BGX_XCAST_MCAST_FILTER;
+ /* here we need to copy mc addrs */
+ if (netdev_mc_count(netdev)) {
+ mc_list = kmalloc(offsetof(typeof(*mc_list),
+ mc[netdev_mc_count(netdev)]),
+ GFP_ATOMIC);
+ if (unlikely(!mc_list))
+ return;
+ mc_list->count = 0;
+ netdev_hw_addr_list_for_each(ha, &netdev->mc) {
+ mc_list->mc[mc_list->count] =
+ ether_addr_to_u64(ha->addr);
+ mc_list->count++;
+ }
+ }
+ }
+ }
+ spin_lock(&nic->rx_mode_wq_lock);
+ kfree(nic->rx_mode_work.mc);
+ nic->rx_mode_work.mc = mc_list;
+ nic->rx_mode_work.mode = mode;
+ queue_delayed_work(nicvf_rx_mode_wq, &nic->rx_mode_work.work, 0);
+ spin_unlock(&nic->rx_mode_wq_lock);
+}
+
+static const struct net_device_ops nicvf_netdev_ops = {
+ .ndo_open = nicvf_open,
+ .ndo_stop = nicvf_stop,
+ .ndo_start_xmit = nicvf_xmit,
+ .ndo_change_mtu = nicvf_change_mtu,
+ .ndo_set_mac_address = nicvf_set_mac_address,
+ .ndo_get_stats64 = nicvf_get_stats64,
+ .ndo_tx_timeout = nicvf_tx_timeout,
+ .ndo_fix_features = nicvf_fix_features,
+ .ndo_set_features = nicvf_set_features,
+ .ndo_bpf = nicvf_xdp,
+ .ndo_do_ioctl = nicvf_ioctl,
+ .ndo_set_rx_mode = nicvf_set_rx_mode,
+};
+
+static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct device *dev = &pdev->dev;
+ struct net_device *netdev;
+ struct nicvf *nic;
+ int err, qcount;
+ u16 sdevid;
+ struct cavium_ptp *ptp_clock;
+
+ ptp_clock = cavium_ptp_get();
+ if (IS_ERR(ptp_clock)) {
+ if (PTR_ERR(ptp_clock) == -ENODEV)
+ /* In virtualized environment we proceed without ptp */
+ ptp_clock = NULL;
+ else
+ return PTR_ERR(ptp_clock);
+ }
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(dev, "Failed to enable PCI device\n");
+ return err;
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ dev_err(dev, "PCI request regions failed 0x%x\n", err);
+ goto err_disable_device;
+ }
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+ if (err) {
+ dev_err(dev, "Unable to get usable DMA configuration\n");
+ goto err_release_regions;
+ }
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+ if (err) {
+ dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n");
+ goto err_release_regions;
+ }
+
+ qcount = netif_get_num_default_rss_queues();
+
+ /* Restrict multiqset support only for host bound VFs */
+ if (pdev->is_virtfn) {
+ /* Set max number of queues per VF */
+ qcount = min_t(int, num_online_cpus(),
+ (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
+ }
+
+ netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount);
+ if (!netdev) {
+ err = -ENOMEM;
+ goto err_release_regions;
+ }
+
+ pci_set_drvdata(pdev, netdev);
+
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+
+ nic = netdev_priv(netdev);
+ nic->netdev = netdev;
+ nic->pdev = pdev;
+ nic->pnicvf = nic;
+ nic->max_queues = qcount;
+ /* If no of CPUs are too low, there won't be any queues left
+ * for XDP_TX, hence double it.
+ */
+ if (!nic->t88)
+ nic->max_queues *= 2;
+ nic->ptp_clock = ptp_clock;
+
+ /* MAP VF's configuration registers */
+ nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+ if (!nic->reg_base) {
+ dev_err(dev, "Cannot map config register space, aborting\n");
+ err = -ENOMEM;
+ goto err_free_netdev;
+ }
+
+ nic->drv_stats = netdev_alloc_pcpu_stats(struct nicvf_drv_stats);
+ if (!nic->drv_stats) {
+ err = -ENOMEM;
+ goto err_free_netdev;
+ }
+
+ err = nicvf_set_qset_resources(nic);
+ if (err)
+ goto err_free_netdev;
+
+ /* Check if PF is alive and get MAC address for this VF */
+ err = nicvf_register_misc_interrupt(nic);
+ if (err)
+ goto err_free_netdev;
+
+ nicvf_send_vf_struct(nic);
+
+ if (!pass1_silicon(nic->pdev))
+ nic->hw_tso = true;
+
+ /* Get iommu domain for iova to physical addr conversion */
+ nic->iommu_domain = iommu_get_domain_for_dev(dev);
+
+ pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+ if (sdevid == 0xA134)
+ nic->t88 = true;
+
+ /* Check if this VF is in QS only mode */
+ if (nic->sqs_mode)
+ return 0;
+
+ err = nicvf_set_real_num_queues(netdev, nic->tx_queues, nic->rx_queues);
+ if (err)
+ goto err_unregister_interrupts;
+
+ netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_SG |
+ NETIF_F_TSO | NETIF_F_GRO | NETIF_F_TSO6 |
+ NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+ NETIF_F_HW_VLAN_CTAG_RX);
+
+ netdev->hw_features |= NETIF_F_RXHASH;
+
+ netdev->features |= netdev->hw_features;
+ netdev->hw_features |= NETIF_F_LOOPBACK;
+
+ netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM |
+ NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6;
+
+ netdev->netdev_ops = &nicvf_netdev_ops;
+ netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
+
+ /* MTU range: 64 - 9200 */
+ netdev->min_mtu = NIC_HW_MIN_FRS;
+ netdev->max_mtu = NIC_HW_MAX_FRS;
+
+ INIT_WORK(&nic->reset_task, nicvf_reset_task);
+
+ INIT_DELAYED_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
+ spin_lock_init(&nic->rx_mode_wq_lock);
+
+ err = register_netdev(netdev);
+ if (err) {
+ dev_err(dev, "Failed to register netdevice\n");
+ goto err_unregister_interrupts;
+ }
+
+ nic->msg_enable = debug;
+
+ nicvf_set_ethtool_ops(netdev);
+
+ return 0;
+
+err_unregister_interrupts:
+ nicvf_unregister_interrupts(nic);
+err_free_netdev:
+ pci_set_drvdata(pdev, NULL);
+ if (nic->drv_stats)
+ free_percpu(nic->drv_stats);
+ free_netdev(netdev);
+err_release_regions:
+ pci_release_regions(pdev);
+err_disable_device:
+ pci_disable_device(pdev);
+ return err;
+}
+
+static void nicvf_remove(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct nicvf *nic;
+ struct net_device *pnetdev;
+
+ if (!netdev)
+ return;
+
+ nic = netdev_priv(netdev);
+ pnetdev = nic->pnicvf->netdev;
+
+ cancel_delayed_work_sync(&nic->rx_mode_work.work);
+
+ /* Check if this Qset is assigned to different VF.
+ * If yes, clean primary and all secondary Qsets.
+ */
+ if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED))
+ unregister_netdev(pnetdev);
+ nicvf_unregister_interrupts(nic);
+ pci_set_drvdata(pdev, NULL);
+ if (nic->drv_stats)
+ free_percpu(nic->drv_stats);
+ cavium_ptp_put(nic->ptp_clock);
+ free_netdev(netdev);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+}
+
+static void nicvf_shutdown(struct pci_dev *pdev)
+{
+ nicvf_remove(pdev);
+}
+
+static struct pci_driver nicvf_driver = {
+ .name = DRV_NAME,
+ .id_table = nicvf_id_table,
+ .probe = nicvf_probe,
+ .remove = nicvf_remove,
+ .shutdown = nicvf_shutdown,
+};
+
+static int __init nicvf_init_module(void)
+{
+ pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
+ nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_generic",
+ WQ_MEM_RECLAIM);
+ return pci_register_driver(&nicvf_driver);
+}
+
+static void __exit nicvf_cleanup_module(void)
+{
+ if (nicvf_rx_mode_wq) {
+ destroy_workqueue(nicvf_rx_mode_wq);
+ nicvf_rx_mode_wq = NULL;
+ }
+ pci_unregister_driver(&nicvf_driver);
+}
+
+module_init(nicvf_init_module);
+module_exit(nicvf_cleanup_module);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
new file mode 100644
index 000000000..d9bcbe469
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -0,0 +1,1976 @@
+/*
+ * Copyright (C) 2015 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <linux/etherdevice.h>
+#include <linux/iommu.h>
+#include <net/ip.h>
+#include <net/tso.h>
+
+#include "nic_reg.h"
+#include "nic.h"
+#include "q_struct.h"
+#include "nicvf_queues.h"
+
+static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
+ int size, u64 data);
+static void nicvf_get_page(struct nicvf *nic)
+{
+ if (!nic->rb_pageref || !nic->rb_page)
+ return;
+
+ page_ref_add(nic->rb_page, nic->rb_pageref);
+ nic->rb_pageref = 0;
+}
+
+/* Poll a register for a specific value */
+static int nicvf_poll_reg(struct nicvf *nic, int qidx,
+ u64 reg, int bit_pos, int bits, int val)
+{
+ u64 bit_mask;
+ u64 reg_val;
+ int timeout = 10;
+
+ bit_mask = (1ULL << bits) - 1;
+ bit_mask = (bit_mask << bit_pos);
+
+ while (timeout) {
+ reg_val = nicvf_queue_reg_read(nic, reg, qidx);
+ if (((reg_val & bit_mask) >> bit_pos) == val)
+ return 0;
+ usleep_range(1000, 2000);
+ timeout--;
+ }
+ netdev_err(nic->netdev, "Poll on reg 0x%llx failed\n", reg);
+ return 1;
+}
+
+/* Allocate memory for a queue's descriptors */
+static int nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem,
+ int q_len, int desc_size, int align_bytes)
+{
+ dmem->q_len = q_len;
+ dmem->size = (desc_size * q_len) + align_bytes;
+ /* Save address, need it while freeing */
+ dmem->unalign_base = dma_zalloc_coherent(&nic->pdev->dev, dmem->size,
+ &dmem->dma, GFP_KERNEL);
+ if (!dmem->unalign_base)
+ return -ENOMEM;
+
+ /* Align memory address for 'align_bytes' */
+ dmem->phys_base = NICVF_ALIGNED_ADDR((u64)dmem->dma, align_bytes);
+ dmem->base = dmem->unalign_base + (dmem->phys_base - dmem->dma);
+ return 0;
+}
+
+/* Free queue's descriptor memory */
+static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
+{
+ if (!dmem)
+ return;
+
+ dma_free_coherent(&nic->pdev->dev, dmem->size,
+ dmem->unalign_base, dmem->dma);
+ dmem->unalign_base = NULL;
+ dmem->base = NULL;
+}
+
+#define XDP_PAGE_REFCNT_REFILL 256
+
+/* Allocate a new page or recycle one if possible
+ *
+ * We cannot optimize dma mapping here, since
+ * 1. It's only one RBDR ring for 8 Rx queues.
+ * 2. CQE_RX gives address of the buffer where pkt has been DMA'ed
+ * and not idx into RBDR ring, so can't refer to saved info.
+ * 3. There are multiple receive buffers per page
+ */
+static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic,
+ struct rbdr *rbdr, gfp_t gfp)
+{
+ int ref_count;
+ struct page *page = NULL;
+ struct pgcache *pgcache, *next;
+
+ /* Check if page is already allocated */
+ pgcache = &rbdr->pgcache[rbdr->pgidx];
+ page = pgcache->page;
+ /* Check if page can be recycled */
+ if (page) {
+ ref_count = page_ref_count(page);
+ /* This page can be recycled if internal ref_count and page's
+ * ref_count are equal, indicating that the page has been used
+ * once for packet transmission. For non-XDP mode, internal
+ * ref_count is always '1'.
+ */
+ if (rbdr->is_xdp) {
+ if (ref_count == pgcache->ref_count)
+ pgcache->ref_count--;
+ else
+ page = NULL;
+ } else if (ref_count != 1) {
+ page = NULL;
+ }
+ }
+
+ if (!page) {
+ page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 0);
+ if (!page)
+ return NULL;
+
+ this_cpu_inc(nic->pnicvf->drv_stats->page_alloc);
+
+ /* Check for space */
+ if (rbdr->pgalloc >= rbdr->pgcnt) {
+ /* Page can still be used */
+ nic->rb_page = page;
+ return NULL;
+ }
+
+ /* Save the page in page cache */
+ pgcache->page = page;
+ pgcache->dma_addr = 0;
+ pgcache->ref_count = 0;
+ rbdr->pgalloc++;
+ }
+
+ /* Take additional page references for recycling */
+ if (rbdr->is_xdp) {
+ /* Since there is single RBDR (i.e single core doing
+ * page recycling) per 8 Rx queues, in XDP mode adjusting
+ * page references atomically is the biggest bottleneck, so
+ * take bunch of references at a time.
+ *
+ * So here, below reference counts defer by '1'.
+ */
+ if (!pgcache->ref_count) {
+ pgcache->ref_count = XDP_PAGE_REFCNT_REFILL;
+ page_ref_add(page, XDP_PAGE_REFCNT_REFILL);
+ }
+ } else {
+ /* In non-XDP case, single 64K page is divided across multiple
+ * receive buffers, so cost of recycling is less anyway.
+ * So we can do with just one extra reference.
+ */
+ page_ref_add(page, 1);
+ }
+
+ rbdr->pgidx++;
+ rbdr->pgidx &= (rbdr->pgcnt - 1);
+
+ /* Prefetch refcount of next page in page cache */
+ next = &rbdr->pgcache[rbdr->pgidx];
+ page = next->page;
+ if (page)
+ prefetch(&page->_refcount);
+
+ return pgcache;
+}
+
+/* Allocate buffer for packet reception */
+static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
+ gfp_t gfp, u32 buf_len, u64 *rbuf)
+{
+ struct pgcache *pgcache = NULL;
+
+ /* Check if request can be accomodated in previous allocated page.
+ * But in XDP mode only one buffer per page is permitted.
+ */
+ if (!rbdr->is_xdp && nic->rb_page &&
+ ((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) {
+ nic->rb_pageref++;
+ goto ret;
+ }
+
+ nicvf_get_page(nic);
+ nic->rb_page = NULL;
+
+ /* Get new page, either recycled or new one */
+ pgcache = nicvf_alloc_page(nic, rbdr, gfp);
+ if (!pgcache && !nic->rb_page) {
+ this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
+ return -ENOMEM;
+ }
+
+ nic->rb_page_offset = 0;
+
+ /* Reserve space for header modifications by BPF program */
+ if (rbdr->is_xdp)
+ buf_len += XDP_PACKET_HEADROOM;
+
+ /* Check if it's recycled */
+ if (pgcache)
+ nic->rb_page = pgcache->page;
+ret:
+ if (rbdr->is_xdp && pgcache && pgcache->dma_addr) {
+ *rbuf = pgcache->dma_addr;
+ } else {
+ /* HW will ensure data coherency, CPU sync not required */
+ *rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
+ nic->rb_page_offset, buf_len,
+ DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
+ if (!nic->rb_page_offset)
+ __free_pages(nic->rb_page, 0);
+ nic->rb_page = NULL;
+ return -ENOMEM;
+ }
+ if (pgcache)
+ pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM;
+ nic->rb_page_offset += buf_len;
+ }
+
+ return 0;
+}
+
+/* Build skb around receive buffer */
+static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic,
+ u64 rb_ptr, int len)
+{
+ void *data;
+ struct sk_buff *skb;
+
+ data = phys_to_virt(rb_ptr);
+
+ /* Now build an skb to give to stack */
+ skb = build_skb(data, RCV_FRAG_LEN);
+ if (!skb) {
+ put_page(virt_to_page(data));
+ return NULL;
+ }
+
+ prefetch(skb->data);
+ return skb;
+}
+
+/* Allocate RBDR ring and populate receive buffers */
+static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
+ int ring_len, int buf_size)
+{
+ int idx;
+ u64 rbuf;
+ struct rbdr_entry_t *desc;
+ int err;
+
+ err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len,
+ sizeof(struct rbdr_entry_t),
+ NICVF_RCV_BUF_ALIGN_BYTES);
+ if (err)
+ return err;
+
+ rbdr->desc = rbdr->dmem.base;
+ /* Buffer size has to be in multiples of 128 bytes */
+ rbdr->dma_size = buf_size;
+ rbdr->enable = true;
+ rbdr->thresh = RBDR_THRESH;
+ rbdr->head = 0;
+ rbdr->tail = 0;
+
+ /* Initialize page recycling stuff.
+ *
+ * Can't use single buffer per page especially with 64K pages.
+ * On embedded platforms i.e 81xx/83xx available memory itself
+ * is low and minimum ring size of RBDR is 8K, that takes away
+ * lots of memory.
+ *
+ * But for XDP it has to be a single buffer per page.
+ */
+ if (!nic->pnicvf->xdp_prog) {
+ rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size);
+ rbdr->is_xdp = false;
+ } else {
+ rbdr->pgcnt = ring_len;
+ rbdr->is_xdp = true;
+ }
+ rbdr->pgcnt = roundup_pow_of_two(rbdr->pgcnt);
+ rbdr->pgcache = kcalloc(rbdr->pgcnt, sizeof(*rbdr->pgcache),
+ GFP_KERNEL);
+ if (!rbdr->pgcache)
+ return -ENOMEM;
+ rbdr->pgidx = 0;
+ rbdr->pgalloc = 0;
+
+ nic->rb_page = NULL;
+ for (idx = 0; idx < ring_len; idx++) {
+ err = nicvf_alloc_rcv_buffer(nic, rbdr, GFP_KERNEL,
+ RCV_FRAG_LEN, &rbuf);
+ if (err) {
+ /* To free already allocated and mapped ones */
+ rbdr->tail = idx - 1;
+ return err;
+ }
+
+ desc = GET_RBDR_DESC(rbdr, idx);
+ desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
+ }
+
+ nicvf_get_page(nic);
+
+ return 0;
+}
+
+/* Free RBDR ring and its receive buffers */
+static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
+{
+ int head, tail;
+ u64 buf_addr, phys_addr;
+ struct pgcache *pgcache;
+ struct rbdr_entry_t *desc;
+
+ if (!rbdr)
+ return;
+
+ rbdr->enable = false;
+ if (!rbdr->dmem.base)
+ return;
+
+ head = rbdr->head;
+ tail = rbdr->tail;
+
+ /* Release page references */
+ while (head != tail) {
+ desc = GET_RBDR_DESC(rbdr, head);
+ buf_addr = desc->buf_addr;
+ phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+ dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+ DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ if (phys_addr)
+ put_page(virt_to_page(phys_to_virt(phys_addr)));
+ head++;
+ head &= (rbdr->dmem.q_len - 1);
+ }
+ /* Release buffer of tail desc */
+ desc = GET_RBDR_DESC(rbdr, tail);
+ buf_addr = desc->buf_addr;
+ phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+ dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+ DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ if (phys_addr)
+ put_page(virt_to_page(phys_to_virt(phys_addr)));
+
+ /* Sync page cache info */
+ smp_rmb();
+
+ /* Release additional page references held for recycling */
+ head = 0;
+ while (head < rbdr->pgcnt) {
+ pgcache = &rbdr->pgcache[head];
+ if (pgcache->page && page_ref_count(pgcache->page) != 0) {
+ if (rbdr->is_xdp) {
+ page_ref_sub(pgcache->page,
+ pgcache->ref_count - 1);
+ }
+ put_page(pgcache->page);
+ }
+ head++;
+ }
+
+ /* Free RBDR ring */
+ nicvf_free_q_desc_mem(nic, &rbdr->dmem);
+}
+
+/* Refill receive buffer descriptors with new buffers.
+ */
+static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp)
+{
+ struct queue_set *qs = nic->qs;
+ int rbdr_idx = qs->rbdr_cnt;
+ int tail, qcount;
+ int refill_rb_cnt;
+ struct rbdr *rbdr;
+ struct rbdr_entry_t *desc;
+ u64 rbuf;
+ int new_rb = 0;
+
+refill:
+ if (!rbdr_idx)
+ return;
+ rbdr_idx--;
+ rbdr = &qs->rbdr[rbdr_idx];
+ /* Check if it's enabled */
+ if (!rbdr->enable)
+ goto next_rbdr;
+
+ /* Get no of desc's to be refilled */
+ qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx);
+ qcount &= 0x7FFFF;
+ /* Doorbell can be ringed with a max of ring size minus 1 */
+ if (qcount >= (qs->rbdr_len - 1))
+ goto next_rbdr;
+ else
+ refill_rb_cnt = qs->rbdr_len - qcount - 1;
+
+ /* Sync page cache info */
+ smp_rmb();
+
+ /* Start filling descs from tail */
+ tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
+ while (refill_rb_cnt) {
+ tail++;
+ tail &= (rbdr->dmem.q_len - 1);
+
+ if (nicvf_alloc_rcv_buffer(nic, rbdr, gfp, RCV_FRAG_LEN, &rbuf))
+ break;
+
+ desc = GET_RBDR_DESC(rbdr, tail);
+ desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
+ refill_rb_cnt--;
+ new_rb++;
+ }
+
+ nicvf_get_page(nic);
+
+ /* make sure all memory stores are done before ringing doorbell */
+ smp_wmb();
+
+ /* Check if buffer allocation failed */
+ if (refill_rb_cnt)
+ nic->rb_alloc_fail = true;
+ else
+ nic->rb_alloc_fail = false;
+
+ /* Notify HW */
+ nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
+ rbdr_idx, new_rb);
+next_rbdr:
+ /* Re-enable RBDR interrupts only if buffer allocation is success */
+ if (!nic->rb_alloc_fail && rbdr->enable &&
+ netif_running(nic->pnicvf->netdev))
+ nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
+
+ if (rbdr_idx)
+ goto refill;
+}
+
+/* Alloc rcv buffers in non-atomic mode for better success */
+void nicvf_rbdr_work(struct work_struct *work)
+{
+ struct nicvf *nic = container_of(work, struct nicvf, rbdr_work.work);
+
+ nicvf_refill_rbdr(nic, GFP_KERNEL);
+ if (nic->rb_alloc_fail)
+ schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
+ else
+ nic->rb_work_scheduled = false;
+}
+
+/* In Softirq context, alloc rcv buffers in atomic mode */
+void nicvf_rbdr_task(unsigned long data)
+{
+ struct nicvf *nic = (struct nicvf *)data;
+
+ nicvf_refill_rbdr(nic, GFP_ATOMIC);
+ if (nic->rb_alloc_fail) {
+ nic->rb_work_scheduled = true;
+ schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
+ }
+}
+
+/* Initialize completion queue */
+static int nicvf_init_cmp_queue(struct nicvf *nic,
+ struct cmp_queue *cq, int q_len)
+{
+ int err;
+
+ err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE,
+ NICVF_CQ_BASE_ALIGN_BYTES);
+ if (err)
+ return err;
+
+ cq->desc = cq->dmem.base;
+ cq->thresh = pass1_silicon(nic->pdev) ? 0 : CMP_QUEUE_CQE_THRESH;
+ nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
+
+ return 0;
+}
+
+static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq)
+{
+ if (!cq)
+ return;
+ if (!cq->dmem.base)
+ return;
+
+ nicvf_free_q_desc_mem(nic, &cq->dmem);
+}
+
+/* Initialize transmit queue */
+static int nicvf_init_snd_queue(struct nicvf *nic,
+ struct snd_queue *sq, int q_len, int qidx)
+{
+ int err;
+
+ err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE,
+ NICVF_SQ_BASE_ALIGN_BYTES);
+ if (err)
+ return err;
+
+ sq->desc = sq->dmem.base;
+ sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
+ if (!sq->skbuff)
+ return -ENOMEM;
+
+ sq->head = 0;
+ sq->tail = 0;
+ sq->thresh = SND_QUEUE_THRESH;
+
+ /* Check if this SQ is a XDP TX queue */
+ if (nic->sqs_mode)
+ qidx += ((nic->sqs_id + 1) * MAX_SND_QUEUES_PER_QS);
+ if (qidx < nic->pnicvf->xdp_tx_queues) {
+ /* Alloc memory to save page pointers for XDP_TX */
+ sq->xdp_page = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
+ if (!sq->xdp_page)
+ return -ENOMEM;
+ sq->xdp_desc_cnt = 0;
+ sq->xdp_free_cnt = q_len - 1;
+ sq->is_xdp = true;
+ } else {
+ sq->xdp_page = NULL;
+ sq->xdp_desc_cnt = 0;
+ sq->xdp_free_cnt = 0;
+ sq->is_xdp = false;
+
+ atomic_set(&sq->free_cnt, q_len - 1);
+
+ /* Preallocate memory for TSO segment's header */
+ sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev,
+ q_len * TSO_HEADER_SIZE,
+ &sq->tso_hdrs_phys,
+ GFP_KERNEL);
+ if (!sq->tso_hdrs)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+ int hdr_sqe, u8 subdesc_cnt)
+{
+ u8 idx;
+ struct sq_gather_subdesc *gather;
+
+ /* Unmap DMA mapped skb data buffers */
+ for (idx = 0; idx < subdesc_cnt; idx++) {
+ hdr_sqe++;
+ hdr_sqe &= (sq->dmem.q_len - 1);
+ gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe);
+ /* HW will ensure data coherency, CPU sync not required */
+ dma_unmap_page_attrs(&nic->pdev->dev, gather->addr,
+ gather->size, DMA_TO_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ }
+}
+
+static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
+{
+ struct sk_buff *skb;
+ struct page *page;
+ struct sq_hdr_subdesc *hdr;
+ struct sq_hdr_subdesc *tso_sqe;
+
+ if (!sq)
+ return;
+ if (!sq->dmem.base)
+ return;
+
+ if (sq->tso_hdrs) {
+ dma_free_coherent(&nic->pdev->dev,
+ sq->dmem.q_len * TSO_HEADER_SIZE,
+ sq->tso_hdrs, sq->tso_hdrs_phys);
+ sq->tso_hdrs = NULL;
+ }
+
+ /* Free pending skbs in the queue */
+ smp_rmb();
+ while (sq->head != sq->tail) {
+ skb = (struct sk_buff *)sq->skbuff[sq->head];
+ if (!skb || !sq->xdp_page)
+ goto next;
+
+ page = (struct page *)sq->xdp_page[sq->head];
+ if (!page)
+ goto next;
+ else
+ put_page(page);
+
+ hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
+ /* Check for dummy descriptor used for HW TSO offload on 88xx */
+ if (hdr->dont_send) {
+ /* Get actual TSO descriptors and unmap them */
+ tso_sqe =
+ (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+ nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+ tso_sqe->subdesc_cnt);
+ } else {
+ nicvf_unmap_sndq_buffers(nic, sq, sq->head,
+ hdr->subdesc_cnt);
+ }
+ if (skb)
+ dev_kfree_skb_any(skb);
+next:
+ sq->head++;
+ sq->head &= (sq->dmem.q_len - 1);
+ }
+ kfree(sq->skbuff);
+ kfree(sq->xdp_page);
+ nicvf_free_q_desc_mem(nic, &sq->dmem);
+}
+
+static void nicvf_reclaim_snd_queue(struct nicvf *nic,
+ struct queue_set *qs, int qidx)
+{
+ /* Disable send queue */
+ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0);
+ /* Check if SQ is stopped */
+ if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01))
+ return;
+ /* Reset send queue */
+ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
+}
+
+static void nicvf_reclaim_rcv_queue(struct nicvf *nic,
+ struct queue_set *qs, int qidx)
+{
+ union nic_mbx mbx = {};
+
+ /* Make sure all packets in the pipeline are written back into mem */
+ mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC;
+ nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_reclaim_cmp_queue(struct nicvf *nic,
+ struct queue_set *qs, int qidx)
+{
+ /* Disable timer threshold (doesn't get reset upon CQ reset */
+ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0);
+ /* Disable completion queue */
+ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0);
+ /* Reset completion queue */
+ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
+}
+
+static void nicvf_reclaim_rbdr(struct nicvf *nic,
+ struct rbdr *rbdr, int qidx)
+{
+ u64 tmp, fifo_state;
+ int timeout = 10;
+
+ /* Save head and tail pointers for feeing up buffers */
+ rbdr->head = nicvf_queue_reg_read(nic,
+ NIC_QSET_RBDR_0_1_HEAD,
+ qidx) >> 3;
+ rbdr->tail = nicvf_queue_reg_read(nic,
+ NIC_QSET_RBDR_0_1_TAIL,
+ qidx) >> 3;
+
+ /* If RBDR FIFO is in 'FAIL' state then do a reset first
+ * before relaiming.
+ */
+ fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx);
+ if (((fifo_state >> 62) & 0x03) == 0x3)
+ nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
+ qidx, NICVF_RBDR_RESET);
+
+ /* Disable RBDR */
+ nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0);
+ if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
+ return;
+ while (1) {
+ tmp = nicvf_queue_reg_read(nic,
+ NIC_QSET_RBDR_0_1_PREFETCH_STATUS,
+ qidx);
+ if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF))
+ break;
+ usleep_range(1000, 2000);
+ timeout--;
+ if (!timeout) {
+ netdev_err(nic->netdev,
+ "Failed polling on prefetch status\n");
+ return;
+ }
+ }
+ nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
+ qidx, NICVF_RBDR_RESET);
+
+ if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02))
+ return;
+ nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00);
+ if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
+ return;
+}
+
+void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features)
+{
+ u64 rq_cfg;
+ int sqs;
+
+ rq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_RQ_GEN_CFG, 0);
+
+ /* Enable first VLAN stripping */
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
+ rq_cfg |= (1ULL << 25);
+ else
+ rq_cfg &= ~(1ULL << 25);
+ nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
+
+ /* Configure Secondary Qsets, if any */
+ for (sqs = 0; sqs < nic->sqs_count; sqs++)
+ if (nic->snicvf[sqs])
+ nicvf_queue_reg_write(nic->snicvf[sqs],
+ NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
+}
+
+static void nicvf_reset_rcv_queue_stats(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+
+ /* Reset all RQ/SQ and VF stats */
+ mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER;
+ mbx.reset_stat.rx_stat_mask = 0x3FFF;
+ mbx.reset_stat.tx_stat_mask = 0x1F;
+ mbx.reset_stat.rq_stat_mask = 0xFFFF;
+ mbx.reset_stat.sq_stat_mask = 0xFFFF;
+ nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+/* Configures receive queue */
+static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
+ int qidx, bool enable)
+{
+ union nic_mbx mbx = {};
+ struct rcv_queue *rq;
+ struct rq_cfg rq_cfg;
+
+ rq = &qs->rq[qidx];
+ rq->enable = enable;
+
+ /* Disable receive queue */
+ nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0);
+
+ if (!rq->enable) {
+ nicvf_reclaim_rcv_queue(nic, qs, qidx);
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+ return;
+ }
+
+ rq->cq_qs = qs->vnic_id;
+ rq->cq_idx = qidx;
+ rq->start_rbdr_qs = qs->vnic_id;
+ rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1;
+ rq->cont_rbdr_qs = qs->vnic_id;
+ rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1;
+ /* all writes of RBDR data to be loaded into L2 Cache as well*/
+ rq->caching = 1;
+
+ /* Driver have no proper error path for failed XDP RX-queue info reg */
+ WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx) < 0);
+
+ /* Send a mailbox msg to PF to config RQ */
+ mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
+ mbx.rq.qs_num = qs->vnic_id;
+ mbx.rq.rq_num = qidx;
+ mbx.rq.cfg = ((u64)rq->caching << 26) | (rq->cq_qs << 19) |
+ (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
+ (rq->cont_qs_rbdr_idx << 8) |
+ (rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx);
+ nicvf_send_msg_to_pf(nic, &mbx);
+
+ mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
+ mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
+ (RQ_PASS_RBDR_LVL << 16) | (RQ_PASS_CQ_LVL << 8) |
+ (qs->vnic_id << 0);
+ nicvf_send_msg_to_pf(nic, &mbx);
+
+ /* RQ drop config
+ * Enable CQ drop to reserve sufficient CQEs for all tx packets
+ */
+ mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
+ mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
+ (RQ_PASS_RBDR_LVL << 40) | (RQ_DROP_RBDR_LVL << 32) |
+ (RQ_PASS_CQ_LVL << 16) | (RQ_DROP_CQ_LVL << 8);
+ nicvf_send_msg_to_pf(nic, &mbx);
+
+ if (!nic->sqs_mode && (qidx == 0)) {
+ /* Enable checking L3/L4 length and TCP/UDP checksums
+ * Also allow IPv6 pkts with zero UDP checksum.
+ */
+ nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
+ (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
+ nicvf_config_vlan_stripping(nic, nic->netdev->features);
+ }
+
+ /* Enable Receive queue */
+ memset(&rq_cfg, 0, sizeof(struct rq_cfg));
+ rq_cfg.ena = 1;
+ rq_cfg.tcp_ena = 0;
+ nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg);
+}
+
+/* Configures completion queue */
+void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
+ int qidx, bool enable)
+{
+ struct cmp_queue *cq;
+ struct cq_cfg cq_cfg;
+
+ cq = &qs->cq[qidx];
+ cq->enable = enable;
+
+ if (!cq->enable) {
+ nicvf_reclaim_cmp_queue(nic, qs, qidx);
+ return;
+ }
+
+ /* Reset completion queue */
+ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
+
+ if (!cq->enable)
+ return;
+
+ spin_lock_init(&cq->lock);
+ /* Set completion queue base address */
+ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE,
+ qidx, (u64)(cq->dmem.phys_base));
+
+ /* Enable Completion queue */
+ memset(&cq_cfg, 0, sizeof(struct cq_cfg));
+ cq_cfg.ena = 1;
+ cq_cfg.reset = 0;
+ cq_cfg.caching = 0;
+ cq_cfg.qsize = ilog2(qs->cq_len >> 10);
+ cq_cfg.avg_con = 0;
+ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(u64 *)&cq_cfg);
+
+ /* Set threshold value for interrupt generation */
+ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
+ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2,
+ qidx, CMP_QUEUE_TIMER_THRESH);
+}
+
+/* Configures transmit queue */
+static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
+ int qidx, bool enable)
+{
+ union nic_mbx mbx = {};
+ struct snd_queue *sq;
+ struct sq_cfg sq_cfg;
+
+ sq = &qs->sq[qidx];
+ sq->enable = enable;
+
+ if (!sq->enable) {
+ nicvf_reclaim_snd_queue(nic, qs, qidx);
+ return;
+ }
+
+ /* Reset send queue */
+ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
+
+ sq->cq_qs = qs->vnic_id;
+ sq->cq_idx = qidx;
+
+ /* Send a mailbox msg to PF to config SQ */
+ mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
+ mbx.sq.qs_num = qs->vnic_id;
+ mbx.sq.sq_num = qidx;
+ mbx.sq.sqs_mode = nic->sqs_mode;
+ mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
+ nicvf_send_msg_to_pf(nic, &mbx);
+
+ /* Set queue base address */
+ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE,
+ qidx, (u64)(sq->dmem.phys_base));
+
+ /* Enable send queue & set queue size */
+ memset(&sq_cfg, 0, sizeof(struct sq_cfg));
+ sq_cfg.ena = 1;
+ sq_cfg.reset = 0;
+ sq_cfg.ldwb = 0;
+ sq_cfg.qsize = ilog2(qs->sq_len >> 10);
+ sq_cfg.tstmp_bgx_intf = 0;
+ /* CQ's level at which HW will stop processing SQEs to avoid
+ * transmitting a pkt with no space in CQ to post CQE_TX.
+ */
+ sq_cfg.cq_limit = (CMP_QUEUE_PIPELINE_RSVD * 256) / qs->cq_len;
+ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg);
+
+ /* Set threshold value for interrupt generation */
+ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh);
+
+ /* Set queue:cpu affinity for better load distribution */
+ if (cpu_online(qidx)) {
+ cpumask_set_cpu(qidx, &sq->affinity_mask);
+ netif_set_xps_queue(nic->netdev,
+ &sq->affinity_mask, qidx);
+ }
+}
+
+/* Configures receive buffer descriptor ring */
+static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs,
+ int qidx, bool enable)
+{
+ struct rbdr *rbdr;
+ struct rbdr_cfg rbdr_cfg;
+
+ rbdr = &qs->rbdr[qidx];
+ nicvf_reclaim_rbdr(nic, rbdr, qidx);
+ if (!enable)
+ return;
+
+ /* Set descriptor base address */
+ nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE,
+ qidx, (u64)(rbdr->dmem.phys_base));
+
+ /* Enable RBDR & set queue size */
+ /* Buffer size should be in multiples of 128 bytes */
+ memset(&rbdr_cfg, 0, sizeof(struct rbdr_cfg));
+ rbdr_cfg.ena = 1;
+ rbdr_cfg.reset = 0;
+ rbdr_cfg.ldwb = 0;
+ rbdr_cfg.qsize = RBDR_SIZE;
+ rbdr_cfg.avg_con = 0;
+ rbdr_cfg.lines = rbdr->dma_size / 128;
+ nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
+ qidx, *(u64 *)&rbdr_cfg);
+
+ /* Notify HW */
+ nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
+ qidx, qs->rbdr_len - 1);
+
+ /* Set threshold value for interrupt generation */
+ nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH,
+ qidx, rbdr->thresh - 1);
+}
+
+/* Requests PF to assign and enable Qset */
+void nicvf_qset_config(struct nicvf *nic, bool enable)
+{
+ union nic_mbx mbx = {};
+ struct queue_set *qs = nic->qs;
+ struct qs_cfg *qs_cfg;
+
+ if (!qs) {
+ netdev_warn(nic->netdev,
+ "Qset is still not allocated, don't init queues\n");
+ return;
+ }
+
+ qs->enable = enable;
+ qs->vnic_id = nic->vf_id;
+
+ /* Send a mailbox msg to PF to config Qset */
+ mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
+ mbx.qs.num = qs->vnic_id;
+ mbx.qs.sqs_count = nic->sqs_count;
+
+ mbx.qs.cfg = 0;
+ qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
+ if (qs->enable) {
+ qs_cfg->ena = 1;
+#ifdef __BIG_ENDIAN
+ qs_cfg->be = 1;
+#endif
+ qs_cfg->vnic = qs->vnic_id;
+ /* Enable Tx timestamping capability */
+ if (nic->ptp_clock)
+ qs_cfg->send_tstmp_ena = 1;
+ }
+ nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_free_resources(struct nicvf *nic)
+{
+ int qidx;
+ struct queue_set *qs = nic->qs;
+
+ /* Free receive buffer descriptor ring */
+ for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
+ nicvf_free_rbdr(nic, &qs->rbdr[qidx]);
+
+ /* Free completion queue */
+ for (qidx = 0; qidx < qs->cq_cnt; qidx++)
+ nicvf_free_cmp_queue(nic, &qs->cq[qidx]);
+
+ /* Free send queue */
+ for (qidx = 0; qidx < qs->sq_cnt; qidx++)
+ nicvf_free_snd_queue(nic, &qs->sq[qidx]);
+}
+
+static int nicvf_alloc_resources(struct nicvf *nic)
+{
+ int qidx;
+ struct queue_set *qs = nic->qs;
+
+ /* Alloc receive buffer descriptor ring */
+ for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
+ if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len,
+ DMA_BUFFER_LEN))
+ goto alloc_fail;
+ }
+
+ /* Alloc send queue */
+ for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
+ if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx))
+ goto alloc_fail;
+ }
+
+ /* Alloc completion queue */
+ for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
+ if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len))
+ goto alloc_fail;
+ }
+
+ return 0;
+alloc_fail:
+ nicvf_free_resources(nic);
+ return -ENOMEM;
+}
+
+int nicvf_set_qset_resources(struct nicvf *nic)
+{
+ struct queue_set *qs;
+
+ qs = devm_kzalloc(&nic->pdev->dev, sizeof(*qs), GFP_KERNEL);
+ if (!qs)
+ return -ENOMEM;
+ nic->qs = qs;
+
+ /* Set count of each queue */
+ qs->rbdr_cnt = DEFAULT_RBDR_CNT;
+ qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus());
+ qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus());
+ qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt);
+
+ /* Set queue lengths */
+ qs->rbdr_len = RCV_BUF_COUNT;
+ qs->sq_len = SND_QUEUE_LEN;
+ qs->cq_len = CMP_QUEUE_LEN;
+
+ nic->rx_queues = qs->rq_cnt;
+ nic->tx_queues = qs->sq_cnt;
+ nic->xdp_tx_queues = 0;
+
+ return 0;
+}
+
+int nicvf_config_data_transfer(struct nicvf *nic, bool enable)
+{
+ bool disable = false;
+ struct queue_set *qs = nic->qs;
+ struct queue_set *pqs = nic->pnicvf->qs;
+ int qidx;
+
+ if (!qs)
+ return 0;
+
+ /* Take primary VF's queue lengths.
+ * This is needed to take queue lengths set from ethtool
+ * into consideration.
+ */
+ if (nic->sqs_mode && pqs) {
+ qs->cq_len = pqs->cq_len;
+ qs->sq_len = pqs->sq_len;
+ }
+
+ if (enable) {
+ if (nicvf_alloc_resources(nic))
+ return -ENOMEM;
+
+ for (qidx = 0; qidx < qs->sq_cnt; qidx++)
+ nicvf_snd_queue_config(nic, qs, qidx, enable);
+ for (qidx = 0; qidx < qs->cq_cnt; qidx++)
+ nicvf_cmp_queue_config(nic, qs, qidx, enable);
+ for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
+ nicvf_rbdr_config(nic, qs, qidx, enable);
+ for (qidx = 0; qidx < qs->rq_cnt; qidx++)
+ nicvf_rcv_queue_config(nic, qs, qidx, enable);
+ } else {
+ for (qidx = 0; qidx < qs->rq_cnt; qidx++)
+ nicvf_rcv_queue_config(nic, qs, qidx, disable);
+ for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
+ nicvf_rbdr_config(nic, qs, qidx, disable);
+ for (qidx = 0; qidx < qs->sq_cnt; qidx++)
+ nicvf_snd_queue_config(nic, qs, qidx, disable);
+ for (qidx = 0; qidx < qs->cq_cnt; qidx++)
+ nicvf_cmp_queue_config(nic, qs, qidx, disable);
+
+ nicvf_free_resources(nic);
+ }
+
+ /* Reset RXQ's stats.
+ * SQ's stats will get reset automatically once SQ is reset.
+ */
+ nicvf_reset_rcv_queue_stats(nic);
+
+ return 0;
+}
+
+/* Get a free desc from SQ
+ * returns descriptor ponter & descriptor number
+ */
+static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
+{
+ int qentry;
+
+ qentry = sq->tail;
+ if (!sq->is_xdp)
+ atomic_sub(desc_cnt, &sq->free_cnt);
+ else
+ sq->xdp_free_cnt -= desc_cnt;
+ sq->tail += desc_cnt;
+ sq->tail &= (sq->dmem.q_len - 1);
+
+ return qentry;
+}
+
+/* Rollback to previous tail pointer when descriptors not used */
+static inline void nicvf_rollback_sq_desc(struct snd_queue *sq,
+ int qentry, int desc_cnt)
+{
+ sq->tail = qentry;
+ atomic_add(desc_cnt, &sq->free_cnt);
+}
+
+/* Free descriptor back to SQ for future use */
+void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
+{
+ if (!sq->is_xdp)
+ atomic_add(desc_cnt, &sq->free_cnt);
+ else
+ sq->xdp_free_cnt += desc_cnt;
+ sq->head += desc_cnt;
+ sq->head &= (sq->dmem.q_len - 1);
+}
+
+static inline int nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry)
+{
+ qentry++;
+ qentry &= (sq->dmem.q_len - 1);
+ return qentry;
+}
+
+void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx)
+{
+ u64 sq_cfg;
+
+ sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
+ sq_cfg |= NICVF_SQ_EN;
+ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
+ /* Ring doorbell so that H/W restarts processing SQEs */
+ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0);
+}
+
+void nicvf_sq_disable(struct nicvf *nic, int qidx)
+{
+ u64 sq_cfg;
+
+ sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
+ sq_cfg &= ~NICVF_SQ_EN;
+ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
+}
+
+void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq,
+ int qidx)
+{
+ u64 head, tail;
+ struct sk_buff *skb;
+ struct nicvf *nic = netdev_priv(netdev);
+ struct sq_hdr_subdesc *hdr;
+
+ head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4;
+ tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4;
+ while (sq->head != head) {
+ hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
+ if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
+ nicvf_put_sq_desc(sq, 1);
+ continue;
+ }
+ skb = (struct sk_buff *)sq->skbuff[sq->head];
+ if (skb)
+ dev_kfree_skb_any(skb);
+ atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets);
+ atomic64_add(hdr->tot_len,
+ (atomic64_t *)&netdev->stats.tx_bytes);
+ nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
+ }
+}
+
+/* XDP Transmit APIs */
+void nicvf_xdp_sq_doorbell(struct nicvf *nic,
+ struct snd_queue *sq, int sq_num)
+{
+ if (!sq->xdp_desc_cnt)
+ return;
+
+ /* make sure all memory stores are done before ringing doorbell */
+ wmb();
+
+ /* Inform HW to xmit all TSO segments */
+ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
+ sq_num, sq->xdp_desc_cnt);
+ sq->xdp_desc_cnt = 0;
+}
+
+static inline void
+nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
+ int subdesc_cnt, u64 data, int len)
+{
+ struct sq_hdr_subdesc *hdr;
+
+ hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
+ memset(hdr, 0, SND_QUEUE_DESC_SIZE);
+ hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
+ hdr->subdesc_cnt = subdesc_cnt;
+ hdr->tot_len = len;
+ hdr->post_cqe = 1;
+ sq->xdp_page[qentry] = (u64)virt_to_page((void *)data);
+}
+
+int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
+ u64 bufaddr, u64 dma_addr, u16 len)
+{
+ int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
+ int qentry;
+
+ if (subdesc_cnt > sq->xdp_free_cnt)
+ return 0;
+
+ qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
+
+ nicvf_xdp_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, bufaddr, len);
+
+ qentry = nicvf_get_nxt_sqentry(sq, qentry);
+ nicvf_sq_add_gather_subdesc(sq, qentry, len, dma_addr);
+
+ sq->xdp_desc_cnt += subdesc_cnt;
+
+ return 1;
+}
+
+/* Calculate no of SQ subdescriptors needed to transmit all
+ * segments of this TSO packet.
+ * Taken from 'Tilera network driver' with a minor modification.
+ */
+static int nicvf_tso_count_subdescs(struct sk_buff *skb)
+{
+ struct skb_shared_info *sh = skb_shinfo(skb);
+ unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ unsigned int data_len = skb->len - sh_len;
+ unsigned int p_len = sh->gso_size;
+ long f_id = -1; /* id of the current fragment */
+ long f_size = skb_headlen(skb) - sh_len; /* current fragment size */
+ long f_used = 0; /* bytes used from the current fragment */
+ long n; /* size of the current piece of payload */
+ int num_edescs = 0;
+ int segment;
+
+ for (segment = 0; segment < sh->gso_segs; segment++) {
+ unsigned int p_used = 0;
+
+ /* One edesc for header and for each piece of the payload. */
+ for (num_edescs++; p_used < p_len; num_edescs++) {
+ /* Advance as needed. */
+ while (f_used >= f_size) {
+ f_id++;
+ f_size = skb_frag_size(&sh->frags[f_id]);
+ f_used = 0;
+ }
+
+ /* Use bytes from the current fragment. */
+ n = p_len - p_used;
+ if (n > f_size - f_used)
+ n = f_size - f_used;
+ f_used += n;
+ p_used += n;
+ }
+
+ /* The last segment may be less than gso_size. */
+ data_len -= p_len;
+ if (data_len < p_len)
+ p_len = data_len;
+ }
+
+ /* '+ gso_segs' for SQ_HDR_SUDESCs for each segment */
+ return num_edescs + sh->gso_segs;
+}
+
+#define POST_CQE_DESC_COUNT 2
+
+/* Get the number of SQ descriptors needed to xmit this skb */
+static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
+{
+ int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
+
+ if (skb_shinfo(skb)->gso_size && !nic->hw_tso) {
+ subdesc_cnt = nicvf_tso_count_subdescs(skb);
+ return subdesc_cnt;
+ }
+
+ /* Dummy descriptors to get TSO pkt completion notification */
+ if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size)
+ subdesc_cnt += POST_CQE_DESC_COUNT;
+
+ if (skb_shinfo(skb)->nr_frags)
+ subdesc_cnt += skb_shinfo(skb)->nr_frags;
+
+ return subdesc_cnt;
+}
+
+/* Add SQ HEADER subdescriptor.
+ * First subdescriptor for every send descriptor.
+ */
+static inline void
+nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
+ int subdesc_cnt, struct sk_buff *skb, int len)
+{
+ int proto;
+ struct sq_hdr_subdesc *hdr;
+ union {
+ struct iphdr *v4;
+ struct ipv6hdr *v6;
+ unsigned char *hdr;
+ } ip;
+
+ ip.hdr = skb_network_header(skb);
+ hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
+ memset(hdr, 0, SND_QUEUE_DESC_SIZE);
+ hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
+
+ if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) {
+ /* post_cqe = 0, to avoid HW posting a CQE for every TSO
+ * segment transmitted on 88xx.
+ */
+ hdr->subdesc_cnt = subdesc_cnt - POST_CQE_DESC_COUNT;
+ } else {
+ sq->skbuff[qentry] = (u64)skb;
+ /* Enable notification via CQE after processing SQE */
+ hdr->post_cqe = 1;
+ /* No of subdescriptors following this */
+ hdr->subdesc_cnt = subdesc_cnt;
+ }
+ hdr->tot_len = len;
+
+ /* Offload checksum calculation to HW */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ if (ip.v4->version == 4)
+ hdr->csum_l3 = 1; /* Enable IP csum calculation */
+ hdr->l3_offset = skb_network_offset(skb);
+ hdr->l4_offset = skb_transport_offset(skb);
+
+ proto = (ip.v4->version == 4) ? ip.v4->protocol :
+ ip.v6->nexthdr;
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ hdr->csum_l4 = SEND_L4_CSUM_TCP;
+ break;
+ case IPPROTO_UDP:
+ hdr->csum_l4 = SEND_L4_CSUM_UDP;
+ break;
+ case IPPROTO_SCTP:
+ hdr->csum_l4 = SEND_L4_CSUM_SCTP;
+ break;
+ }
+ }
+
+ if (nic->hw_tso && skb_shinfo(skb)->gso_size) {
+ hdr->tso = 1;
+ hdr->tso_start = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ hdr->tso_max_paysize = skb_shinfo(skb)->gso_size;
+ /* For non-tunneled pkts, point this to L2 ethertype */
+ hdr->inner_l3_offset = skb_network_offset(skb) - 2;
+ this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
+ }
+
+ /* Check if timestamp is requested */
+ if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+ skb_tx_timestamp(skb);
+ return;
+ }
+
+ /* Tx timestamping not supported along with TSO, so ignore request */
+ if (skb_shinfo(skb)->gso_size)
+ return;
+
+ /* HW supports only a single outstanding packet to timestamp */
+ if (!atomic_add_unless(&nic->pnicvf->tx_ptp_skbs, 1, 1))
+ return;
+
+ /* Mark the SKB for later reference */
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+ /* Finally enable timestamp generation
+ * Since 'post_cqe' is also set, two CQEs will be posted
+ * for this packet i.e CQE_TYPE_SEND and CQE_TYPE_SEND_PTP.
+ */
+ hdr->tstmp = 1;
+}
+
+/* SQ GATHER subdescriptor
+ * Must follow HDR descriptor
+ */
+static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
+ int size, u64 data)
+{
+ struct sq_gather_subdesc *gather;
+
+ qentry &= (sq->dmem.q_len - 1);
+ gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry);
+
+ memset(gather, 0, SND_QUEUE_DESC_SIZE);
+ gather->subdesc_type = SQ_DESC_TYPE_GATHER;
+ gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
+ gather->size = size;
+ gather->addr = data;
+}
+
+/* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO
+ * packet so that a CQE is posted as a notifation for transmission of
+ * TSO packet.
+ */
+static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry,
+ int tso_sqe, struct sk_buff *skb)
+{
+ struct sq_imm_subdesc *imm;
+ struct sq_hdr_subdesc *hdr;
+
+ sq->skbuff[qentry] = (u64)skb;
+
+ hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
+ memset(hdr, 0, SND_QUEUE_DESC_SIZE);
+ hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
+ /* Enable notification via CQE after processing SQE */
+ hdr->post_cqe = 1;
+ /* There is no packet to transmit here */
+ hdr->dont_send = 1;
+ hdr->subdesc_cnt = POST_CQE_DESC_COUNT - 1;
+ hdr->tot_len = 1;
+ /* Actual TSO header SQE index, needed for cleanup */
+ hdr->rsvd2 = tso_sqe;
+
+ qentry = nicvf_get_nxt_sqentry(sq, qentry);
+ imm = (struct sq_imm_subdesc *)GET_SQ_DESC(sq, qentry);
+ memset(imm, 0, SND_QUEUE_DESC_SIZE);
+ imm->subdesc_type = SQ_DESC_TYPE_IMMEDIATE;
+ imm->len = 1;
+}
+
+static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb,
+ int sq_num, int desc_cnt)
+{
+ struct netdev_queue *txq;
+
+ txq = netdev_get_tx_queue(nic->pnicvf->netdev,
+ skb_get_queue_mapping(skb));
+
+ netdev_tx_sent_queue(txq, skb->len);
+
+ /* make sure all memory stores are done before ringing doorbell */
+ smp_wmb();
+
+ /* Inform HW to xmit all TSO segments */
+ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
+ sq_num, desc_cnt);
+}
+
+/* Segment a TSO packet into 'gso_size' segments and append
+ * them to SQ for transfer
+ */
+static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
+ int sq_num, int qentry, struct sk_buff *skb)
+{
+ struct tso_t tso;
+ int seg_subdescs = 0, desc_cnt = 0;
+ int seg_len, total_len, data_left;
+ int hdr_qentry = qentry;
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+ tso_start(skb, &tso);
+ total_len = skb->len - hdr_len;
+ while (total_len > 0) {
+ char *hdr;
+
+ /* Save Qentry for adding HDR_SUBDESC at the end */
+ hdr_qentry = qentry;
+
+ data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+ total_len -= data_left;
+
+ /* Add segment's header */
+ qentry = nicvf_get_nxt_sqentry(sq, qentry);
+ hdr = sq->tso_hdrs + qentry * TSO_HEADER_SIZE;
+ tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
+ nicvf_sq_add_gather_subdesc(sq, qentry, hdr_len,
+ sq->tso_hdrs_phys +
+ qentry * TSO_HEADER_SIZE);
+ /* HDR_SUDESC + GATHER */
+ seg_subdescs = 2;
+ seg_len = hdr_len;
+
+ /* Add segment's payload fragments */
+ while (data_left > 0) {
+ int size;
+
+ size = min_t(int, tso.size, data_left);
+
+ qentry = nicvf_get_nxt_sqentry(sq, qentry);
+ nicvf_sq_add_gather_subdesc(sq, qentry, size,
+ virt_to_phys(tso.data));
+ seg_subdescs++;
+ seg_len += size;
+
+ data_left -= size;
+ tso_build_data(skb, &tso, size);
+ }
+ nicvf_sq_add_hdr_subdesc(nic, sq, hdr_qentry,
+ seg_subdescs - 1, skb, seg_len);
+ sq->skbuff[hdr_qentry] = (u64)NULL;
+ qentry = nicvf_get_nxt_sqentry(sq, qentry);
+
+ desc_cnt += seg_subdescs;
+ }
+ /* Save SKB in the last segment for freeing */
+ sq->skbuff[hdr_qentry] = (u64)skb;
+
+ nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt);
+
+ this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
+ return 1;
+}
+
+/* Append an skb to a SQ for packet transfer. */
+int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
+ struct sk_buff *skb, u8 sq_num)
+{
+ int i, size;
+ int subdesc_cnt, hdr_sqe = 0;
+ int qentry;
+ u64 dma_addr;
+
+ subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
+ if (subdesc_cnt > atomic_read(&sq->free_cnt))
+ goto append_fail;
+
+ qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
+
+ /* Check if its a TSO packet */
+ if (skb_shinfo(skb)->gso_size && !nic->hw_tso)
+ return nicvf_sq_append_tso(nic, sq, sq_num, qentry, skb);
+
+ /* Add SQ header subdesc */
+ nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
+ skb, skb->len);
+ hdr_sqe = qentry;
+
+ /* Add SQ gather subdescs */
+ qentry = nicvf_get_nxt_sqentry(sq, qentry);
+ size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
+ /* HW will ensure data coherency, CPU sync not required */
+ dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data),
+ offset_in_page(skb->data), size,
+ DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+ nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+ return 0;
+ }
+
+ nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
+
+ /* Check for scattered buffer */
+ if (!skb_is_nonlinear(skb))
+ goto doorbell;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ const struct skb_frag_struct *frag;
+
+ frag = &skb_shinfo(skb)->frags[i];
+
+ qentry = nicvf_get_nxt_sqentry(sq, qentry);
+ size = skb_frag_size(frag);
+ dma_addr = dma_map_page_attrs(&nic->pdev->dev,
+ skb_frag_page(frag),
+ frag->page_offset, size,
+ DMA_TO_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+ /* Free entire chain of mapped buffers
+ * here 'i' = frags mapped + above mapped skb->data
+ */
+ nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i);
+ nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+ return 0;
+ }
+ nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
+ }
+
+doorbell:
+ if (nic->t88 && skb_shinfo(skb)->gso_size) {
+ qentry = nicvf_get_nxt_sqentry(sq, qentry);
+ nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb);
+ }
+
+ nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
+
+ return 1;
+
+append_fail:
+ /* Use original PCI dev for debug log */
+ nic = nic->pnicvf;
+ netdev_dbg(nic->netdev, "Not enough SQ descriptors to xmit pkt\n");
+ return 0;
+}
+
+static inline unsigned frag_num(unsigned i)
+{
+#ifdef __BIG_ENDIAN
+ return (i & ~3) + 3 - (i & 3);
+#else
+ return i;
+#endif
+}
+
+static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr,
+ u64 buf_addr, bool xdp)
+{
+ struct page *page = NULL;
+ int len = RCV_FRAG_LEN;
+
+ if (xdp) {
+ page = virt_to_page(phys_to_virt(buf_addr));
+ /* Check if it's a recycled page, if not
+ * unmap the DMA mapping.
+ *
+ * Recycled page holds an extra reference.
+ */
+ if (page_ref_count(page) != 1)
+ return;
+
+ len += XDP_PACKET_HEADROOM;
+ /* Receive buffers in XDP mode are mapped from page start */
+ dma_addr &= PAGE_MASK;
+ }
+ dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, len,
+ DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+/* Returns SKB for a received packet */
+struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic,
+ struct cqe_rx_t *cqe_rx, bool xdp)
+{
+ int frag;
+ int payload_len = 0;
+ struct sk_buff *skb = NULL;
+ struct page *page;
+ int offset;
+ u16 *rb_lens = NULL;
+ u64 *rb_ptrs = NULL;
+ u64 phys_addr;
+
+ rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
+ /* Except 88xx pass1 on all other chips CQE_RX2_S is added to
+ * CQE_RX at word6, hence buffer pointers move by word
+ *
+ * Use existing 'hw_tso' flag which will be set for all chips
+ * except 88xx pass1 instead of a additional cache line
+ * access (or miss) by using pci dev's revision.
+ */
+ if (!nic->hw_tso)
+ rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
+ else
+ rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
+
+ for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
+ payload_len = rb_lens[frag_num(frag)];
+ phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs);
+ if (!phys_addr) {
+ if (skb)
+ dev_kfree_skb_any(skb);
+ return NULL;
+ }
+
+ if (!frag) {
+ /* First fragment */
+ nicvf_unmap_rcv_buffer(nic,
+ *rb_ptrs - cqe_rx->align_pad,
+ phys_addr, xdp);
+ skb = nicvf_rb_ptr_to_skb(nic,
+ phys_addr - cqe_rx->align_pad,
+ payload_len);
+ if (!skb)
+ return NULL;
+ skb_reserve(skb, cqe_rx->align_pad);
+ skb_put(skb, payload_len);
+ } else {
+ /* Add fragments */
+ nicvf_unmap_rcv_buffer(nic, *rb_ptrs, phys_addr, xdp);
+ page = virt_to_page(phys_to_virt(phys_addr));
+ offset = phys_to_virt(phys_addr) - page_address(page);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+ offset, payload_len, RCV_FRAG_LEN);
+ }
+ /* Next buffer pointer */
+ rb_ptrs++;
+ }
+ return skb;
+}
+
+static u64 nicvf_int_type_to_mask(int int_type, int q_idx)
+{
+ u64 reg_val;
+
+ switch (int_type) {
+ case NICVF_INTR_CQ:
+ reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
+ break;
+ case NICVF_INTR_SQ:
+ reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
+ break;
+ case NICVF_INTR_RBDR:
+ reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
+ break;
+ case NICVF_INTR_PKT_DROP:
+ reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
+ break;
+ case NICVF_INTR_TCP_TIMER:
+ reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
+ break;
+ case NICVF_INTR_MBOX:
+ reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT);
+ break;
+ case NICVF_INTR_QS_ERR:
+ reg_val = (1ULL << NICVF_INTR_QS_ERR_SHIFT);
+ break;
+ default:
+ reg_val = 0;
+ }
+
+ return reg_val;
+}
+
+/* Enable interrupt */
+void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
+{
+ u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
+
+ if (!mask) {
+ netdev_dbg(nic->netdev,
+ "Failed to enable interrupt: unknown type\n");
+ return;
+ }
+ nicvf_reg_write(nic, NIC_VF_ENA_W1S,
+ nicvf_reg_read(nic, NIC_VF_ENA_W1S) | mask);
+}
+
+/* Disable interrupt */
+void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
+{
+ u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
+
+ if (!mask) {
+ netdev_dbg(nic->netdev,
+ "Failed to disable interrupt: unknown type\n");
+ return;
+ }
+
+ nicvf_reg_write(nic, NIC_VF_ENA_W1C, mask);
+}
+
+/* Clear interrupt */
+void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
+{
+ u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
+
+ if (!mask) {
+ netdev_dbg(nic->netdev,
+ "Failed to clear interrupt: unknown type\n");
+ return;
+ }
+
+ nicvf_reg_write(nic, NIC_VF_INT, mask);
+}
+
+/* Check if interrupt is enabled */
+int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
+{
+ u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
+ /* If interrupt type is unknown, we treat it disabled. */
+ if (!mask) {
+ netdev_dbg(nic->netdev,
+ "Failed to check interrupt enable: unknown type\n");
+ return 0;
+ }
+
+ return mask & nicvf_reg_read(nic, NIC_VF_ENA_W1S);
+}
+
+void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
+{
+ struct rcv_queue *rq;
+
+#define GET_RQ_STATS(reg) \
+ nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
+ (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
+
+ rq = &nic->qs->rq[rq_idx];
+ rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS);
+ rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS);
+}
+
+void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
+{
+ struct snd_queue *sq;
+
+#define GET_SQ_STATS(reg) \
+ nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
+ (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
+
+ sq = &nic->qs->sq[sq_idx];
+ sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS);
+ sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS);
+}
+
+/* Check for errors in the receive cmp.queue entry */
+int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
+{
+ netif_err(nic, rx_err, nic->netdev,
+ "RX error CQE err_level 0x%x err_opcode 0x%x\n",
+ cqe_rx->err_level, cqe_rx->err_opcode);
+
+ switch (cqe_rx->err_opcode) {
+ case CQ_RX_ERROP_RE_PARTIAL:
+ this_cpu_inc(nic->drv_stats->rx_bgx_truncated_pkts);
+ break;
+ case CQ_RX_ERROP_RE_JABBER:
+ this_cpu_inc(nic->drv_stats->rx_jabber_errs);
+ break;
+ case CQ_RX_ERROP_RE_FCS:
+ this_cpu_inc(nic->drv_stats->rx_fcs_errs);
+ break;
+ case CQ_RX_ERROP_RE_RX_CTL:
+ this_cpu_inc(nic->drv_stats->rx_bgx_errs);
+ break;
+ case CQ_RX_ERROP_PREL2_ERR:
+ this_cpu_inc(nic->drv_stats->rx_prel2_errs);
+ break;
+ case CQ_RX_ERROP_L2_MAL:
+ this_cpu_inc(nic->drv_stats->rx_l2_hdr_malformed);
+ break;
+ case CQ_RX_ERROP_L2_OVERSIZE:
+ this_cpu_inc(nic->drv_stats->rx_oversize);
+ break;
+ case CQ_RX_ERROP_L2_UNDERSIZE:
+ this_cpu_inc(nic->drv_stats->rx_undersize);
+ break;
+ case CQ_RX_ERROP_L2_LENMISM:
+ this_cpu_inc(nic->drv_stats->rx_l2_len_mismatch);
+ break;
+ case CQ_RX_ERROP_L2_PCLP:
+ this_cpu_inc(nic->drv_stats->rx_l2_pclp);
+ break;
+ case CQ_RX_ERROP_IP_NOT:
+ this_cpu_inc(nic->drv_stats->rx_ip_ver_errs);
+ break;
+ case CQ_RX_ERROP_IP_CSUM_ERR:
+ this_cpu_inc(nic->drv_stats->rx_ip_csum_errs);
+ break;
+ case CQ_RX_ERROP_IP_MAL:
+ this_cpu_inc(nic->drv_stats->rx_ip_hdr_malformed);
+ break;
+ case CQ_RX_ERROP_IP_MALD:
+ this_cpu_inc(nic->drv_stats->rx_ip_payload_malformed);
+ break;
+ case CQ_RX_ERROP_IP_HOP:
+ this_cpu_inc(nic->drv_stats->rx_ip_ttl_errs);
+ break;
+ case CQ_RX_ERROP_L3_PCLP:
+ this_cpu_inc(nic->drv_stats->rx_l3_pclp);
+ break;
+ case CQ_RX_ERROP_L4_MAL:
+ this_cpu_inc(nic->drv_stats->rx_l4_malformed);
+ break;
+ case CQ_RX_ERROP_L4_CHK:
+ this_cpu_inc(nic->drv_stats->rx_l4_csum_errs);
+ break;
+ case CQ_RX_ERROP_UDP_LEN:
+ this_cpu_inc(nic->drv_stats->rx_udp_len_errs);
+ break;
+ case CQ_RX_ERROP_L4_PORT:
+ this_cpu_inc(nic->drv_stats->rx_l4_port_errs);
+ break;
+ case CQ_RX_ERROP_TCP_FLAG:
+ this_cpu_inc(nic->drv_stats->rx_tcp_flag_errs);
+ break;
+ case CQ_RX_ERROP_TCP_OFFSET:
+ this_cpu_inc(nic->drv_stats->rx_tcp_offset_errs);
+ break;
+ case CQ_RX_ERROP_L4_PCLP:
+ this_cpu_inc(nic->drv_stats->rx_l4_pclp);
+ break;
+ case CQ_RX_ERROP_RBDR_TRUNC:
+ this_cpu_inc(nic->drv_stats->rx_truncated_pkts);
+ break;
+ }
+
+ return 1;
+}
+
+/* Check for errors in the send cmp.queue entry */
+int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx)
+{
+ switch (cqe_tx->send_status) {
+ case CQ_TX_ERROP_DESC_FAULT:
+ this_cpu_inc(nic->drv_stats->tx_desc_fault);
+ break;
+ case CQ_TX_ERROP_HDR_CONS_ERR:
+ this_cpu_inc(nic->drv_stats->tx_hdr_cons_err);
+ break;
+ case CQ_TX_ERROP_SUBDC_ERR:
+ this_cpu_inc(nic->drv_stats->tx_subdesc_err);
+ break;
+ case CQ_TX_ERROP_MAX_SIZE_VIOL:
+ this_cpu_inc(nic->drv_stats->tx_max_size_exceeded);
+ break;
+ case CQ_TX_ERROP_IMM_SIZE_OFLOW:
+ this_cpu_inc(nic->drv_stats->tx_imm_size_oflow);
+ break;
+ case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
+ this_cpu_inc(nic->drv_stats->tx_data_seq_err);
+ break;
+ case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
+ this_cpu_inc(nic->drv_stats->tx_mem_seq_err);
+ break;
+ case CQ_TX_ERROP_LOCK_VIOL:
+ this_cpu_inc(nic->drv_stats->tx_lock_viol);
+ break;
+ case CQ_TX_ERROP_DATA_FAULT:
+ this_cpu_inc(nic->drv_stats->tx_data_fault);
+ break;
+ case CQ_TX_ERROP_TSTMP_CONFLICT:
+ this_cpu_inc(nic->drv_stats->tx_tstmp_conflict);
+ break;
+ case CQ_TX_ERROP_TSTMP_TIMEOUT:
+ this_cpu_inc(nic->drv_stats->tx_tstmp_timeout);
+ break;
+ case CQ_TX_ERROP_MEM_FAULT:
+ this_cpu_inc(nic->drv_stats->tx_mem_fault);
+ break;
+ case CQ_TX_ERROP_CK_OVERLAP:
+ this_cpu_inc(nic->drv_stats->tx_csum_overlap);
+ break;
+ case CQ_TX_ERROP_CK_OFLOW:
+ this_cpu_inc(nic->drv_stats->tx_csum_overflow);
+ break;
+ }
+
+ return 1;
+}
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
new file mode 100644
index 000000000..5e9a03cf1
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -0,0 +1,377 @@
+/*
+ * Copyright (C) 2015 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef NICVF_QUEUES_H
+#define NICVF_QUEUES_H
+
+#include <linux/netdevice.h>
+#include <linux/iommu.h>
+#include <net/xdp.h>
+#include "q_struct.h"
+
+#define MAX_QUEUE_SET 128
+#define MAX_RCV_QUEUES_PER_QS 8
+#define MAX_RCV_BUF_DESC_RINGS_PER_QS 2
+#define MAX_SND_QUEUES_PER_QS 8
+#define MAX_CMP_QUEUES_PER_QS 8
+
+/* VF's queue interrupt ranges */
+#define NICVF_INTR_ID_CQ 0
+#define NICVF_INTR_ID_SQ 8
+#define NICVF_INTR_ID_RBDR 16
+#define NICVF_INTR_ID_MISC 18
+#define NICVF_INTR_ID_QS_ERR 19
+
+#define for_each_cq_irq(irq) \
+ for (irq = NICVF_INTR_ID_CQ; irq < NICVF_INTR_ID_SQ; irq++)
+#define for_each_sq_irq(irq) \
+ for (irq = NICVF_INTR_ID_SQ; irq < NICVF_INTR_ID_RBDR; irq++)
+#define for_each_rbdr_irq(irq) \
+ for (irq = NICVF_INTR_ID_RBDR; irq < NICVF_INTR_ID_MISC; irq++)
+
+#define RBDR_SIZE0 0ULL /* 8K entries */
+#define RBDR_SIZE1 1ULL /* 16K entries */
+#define RBDR_SIZE2 2ULL /* 32K entries */
+#define RBDR_SIZE3 3ULL /* 64K entries */
+#define RBDR_SIZE4 4ULL /* 126K entries */
+#define RBDR_SIZE5 5ULL /* 256K entries */
+#define RBDR_SIZE6 6ULL /* 512K entries */
+
+#define SND_QUEUE_SIZE0 0ULL /* 1K entries */
+#define SND_QUEUE_SIZE1 1ULL /* 2K entries */
+#define SND_QUEUE_SIZE2 2ULL /* 4K entries */
+#define SND_QUEUE_SIZE3 3ULL /* 8K entries */
+#define SND_QUEUE_SIZE4 4ULL /* 16K entries */
+#define SND_QUEUE_SIZE5 5ULL /* 32K entries */
+#define SND_QUEUE_SIZE6 6ULL /* 64K entries */
+
+#define CMP_QUEUE_SIZE0 0ULL /* 1K entries */
+#define CMP_QUEUE_SIZE1 1ULL /* 2K entries */
+#define CMP_QUEUE_SIZE2 2ULL /* 4K entries */
+#define CMP_QUEUE_SIZE3 3ULL /* 8K entries */
+#define CMP_QUEUE_SIZE4 4ULL /* 16K entries */
+#define CMP_QUEUE_SIZE5 5ULL /* 32K entries */
+#define CMP_QUEUE_SIZE6 6ULL /* 64K entries */
+
+/* Default queue count per QS, its lengths and threshold values */
+#define DEFAULT_RBDR_CNT 1
+
+#define SND_QSIZE SND_QUEUE_SIZE0
+#define SND_QUEUE_LEN (1ULL << (SND_QSIZE + 10))
+#define MIN_SND_QUEUE_LEN (1ULL << (SND_QUEUE_SIZE0 + 10))
+#define MAX_SND_QUEUE_LEN (1ULL << (SND_QUEUE_SIZE6 + 10))
+#define SND_QUEUE_THRESH 2ULL
+#define MIN_SQ_DESC_PER_PKT_XMIT 2
+/* Since timestamp not enabled, otherwise 2 */
+#define MAX_CQE_PER_PKT_XMIT 1
+
+/* Keep CQ and SQ sizes same, if timestamping
+ * is enabled this equation will change.
+ */
+#define CMP_QSIZE CMP_QUEUE_SIZE0
+#define CMP_QUEUE_LEN (1ULL << (CMP_QSIZE + 10))
+#define MIN_CMP_QUEUE_LEN (1ULL << (CMP_QUEUE_SIZE0 + 10))
+#define MAX_CMP_QUEUE_LEN (1ULL << (CMP_QUEUE_SIZE6 + 10))
+#define CMP_QUEUE_CQE_THRESH (NAPI_POLL_WEIGHT / 2)
+#define CMP_QUEUE_TIMER_THRESH 80 /* ~2usec */
+
+/* No of CQEs that might anyway gets used by HW due to pipelining
+ * effects irrespective of PASS/DROP/LEVELS being configured
+ */
+#define CMP_QUEUE_PIPELINE_RSVD 544
+
+#define RBDR_SIZE RBDR_SIZE0
+#define RCV_BUF_COUNT (1ULL << (RBDR_SIZE + 13))
+#define MAX_RCV_BUF_COUNT (1ULL << (RBDR_SIZE6 + 13))
+#define RBDR_THRESH (RCV_BUF_COUNT / 2)
+#define DMA_BUFFER_LEN 1536 /* In multiples of 128bytes */
+#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
+#define MAX_CQES_FOR_TX ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \
+ MAX_CQE_PER_PKT_XMIT)
+
+/* RED and Backpressure levels of CQ for pkt reception
+ * For CQ, level is a measure of emptiness i.e 0x0 means full
+ * eg: For CQ of size 4K, and for pass/drop levels of 160/144
+ * HW accepts pkt if unused CQE >= 2560
+ * RED accepts pkt if unused CQE < 2304 & >= 2560
+ * DROPs pkts if unused CQE < 2304
+ */
+#define RQ_PASS_CQ_LVL 192ULL
+#define RQ_DROP_CQ_LVL 184ULL
+
+/* RED and Backpressure levels of RBDR for pkt reception
+ * For RBDR, level is a measure of fullness i.e 0x0 means empty
+ * eg: For RBDR of size 8K, and for pass/drop levels of 4/0
+ * HW accepts pkt if unused RBs >= 256
+ * RED accepts pkt if unused RBs < 256 & >= 0
+ * DROPs pkts if unused RBs < 0
+ */
+#define RQ_PASS_RBDR_LVL 8ULL
+#define RQ_DROP_RBDR_LVL 0ULL
+
+/* Descriptor size in bytes */
+#define SND_QUEUE_DESC_SIZE 16
+#define CMP_QUEUE_DESC_SIZE 512
+
+/* Buffer / descriptor alignments */
+#define NICVF_RCV_BUF_ALIGN 7
+#define NICVF_RCV_BUF_ALIGN_BYTES (1ULL << NICVF_RCV_BUF_ALIGN)
+#define NICVF_CQ_BASE_ALIGN_BYTES 512 /* 9 bits */
+#define NICVF_SQ_BASE_ALIGN_BYTES 128 /* 7 bits */
+
+#define NICVF_ALIGNED_ADDR(ADDR, ALIGN_BYTES) ALIGN(ADDR, ALIGN_BYTES)
+
+/* Queue enable/disable */
+#define NICVF_SQ_EN BIT_ULL(19)
+
+/* Queue reset */
+#define NICVF_CQ_RESET BIT_ULL(41)
+#define NICVF_SQ_RESET BIT_ULL(17)
+#define NICVF_RBDR_RESET BIT_ULL(43)
+
+enum CQ_RX_ERRLVL_E {
+ CQ_ERRLVL_MAC,
+ CQ_ERRLVL_L2,
+ CQ_ERRLVL_L3,
+ CQ_ERRLVL_L4,
+};
+
+enum CQ_RX_ERROP_E {
+ CQ_RX_ERROP_RE_NONE = 0x0,
+ CQ_RX_ERROP_RE_PARTIAL = 0x1,
+ CQ_RX_ERROP_RE_JABBER = 0x2,
+ CQ_RX_ERROP_RE_FCS = 0x7,
+ CQ_RX_ERROP_RE_TERMINATE = 0x9,
+ CQ_RX_ERROP_RE_RX_CTL = 0xb,
+ CQ_RX_ERROP_PREL2_ERR = 0x1f,
+ CQ_RX_ERROP_L2_FRAGMENT = 0x20,
+ CQ_RX_ERROP_L2_OVERRUN = 0x21,
+ CQ_RX_ERROP_L2_PFCS = 0x22,
+ CQ_RX_ERROP_L2_PUNY = 0x23,
+ CQ_RX_ERROP_L2_MAL = 0x24,
+ CQ_RX_ERROP_L2_OVERSIZE = 0x25,
+ CQ_RX_ERROP_L2_UNDERSIZE = 0x26,
+ CQ_RX_ERROP_L2_LENMISM = 0x27,
+ CQ_RX_ERROP_L2_PCLP = 0x28,
+ CQ_RX_ERROP_IP_NOT = 0x41,
+ CQ_RX_ERROP_IP_CSUM_ERR = 0x42,
+ CQ_RX_ERROP_IP_MAL = 0x43,
+ CQ_RX_ERROP_IP_MALD = 0x44,
+ CQ_RX_ERROP_IP_HOP = 0x45,
+ CQ_RX_ERROP_L3_ICRC = 0x46,
+ CQ_RX_ERROP_L3_PCLP = 0x47,
+ CQ_RX_ERROP_L4_MAL = 0x61,
+ CQ_RX_ERROP_L4_CHK = 0x62,
+ CQ_RX_ERROP_UDP_LEN = 0x63,
+ CQ_RX_ERROP_L4_PORT = 0x64,
+ CQ_RX_ERROP_TCP_FLAG = 0x65,
+ CQ_RX_ERROP_TCP_OFFSET = 0x66,
+ CQ_RX_ERROP_L4_PCLP = 0x67,
+ CQ_RX_ERROP_RBDR_TRUNC = 0x70,
+};
+
+enum CQ_TX_ERROP_E {
+ CQ_TX_ERROP_GOOD = 0x0,
+ CQ_TX_ERROP_DESC_FAULT = 0x10,
+ CQ_TX_ERROP_HDR_CONS_ERR = 0x11,
+ CQ_TX_ERROP_SUBDC_ERR = 0x12,
+ CQ_TX_ERROP_MAX_SIZE_VIOL = 0x13,
+ CQ_TX_ERROP_IMM_SIZE_OFLOW = 0x80,
+ CQ_TX_ERROP_DATA_SEQUENCE_ERR = 0x81,
+ CQ_TX_ERROP_MEM_SEQUENCE_ERR = 0x82,
+ CQ_TX_ERROP_LOCK_VIOL = 0x83,
+ CQ_TX_ERROP_DATA_FAULT = 0x84,
+ CQ_TX_ERROP_TSTMP_CONFLICT = 0x85,
+ CQ_TX_ERROP_TSTMP_TIMEOUT = 0x86,
+ CQ_TX_ERROP_MEM_FAULT = 0x87,
+ CQ_TX_ERROP_CK_OVERLAP = 0x88,
+ CQ_TX_ERROP_CK_OFLOW = 0x89,
+ CQ_TX_ERROP_ENUM_LAST = 0x8a,
+};
+
+enum RQ_SQ_STATS {
+ RQ_SQ_STATS_OCTS,
+ RQ_SQ_STATS_PKTS,
+};
+
+struct rx_tx_queue_stats {
+ u64 bytes;
+ u64 pkts;
+} ____cacheline_aligned_in_smp;
+
+struct q_desc_mem {
+ dma_addr_t dma;
+ u64 size;
+ u32 q_len;
+ dma_addr_t phys_base;
+ void *base;
+ void *unalign_base;
+};
+
+struct pgcache {
+ struct page *page;
+ int ref_count;
+ u64 dma_addr;
+};
+
+struct rbdr {
+ bool enable;
+ u32 dma_size;
+ u32 frag_len;
+ u32 thresh; /* Threshold level for interrupt */
+ void *desc;
+ u32 head;
+ u32 tail;
+ struct q_desc_mem dmem;
+ bool is_xdp;
+
+ /* For page recycling */
+ int pgidx;
+ int pgcnt;
+ int pgalloc;
+ struct pgcache *pgcache;
+} ____cacheline_aligned_in_smp;
+
+struct rcv_queue {
+ bool enable;
+ struct rbdr *rbdr_start;
+ struct rbdr *rbdr_cont;
+ bool en_tcp_reassembly;
+ u8 cq_qs; /* CQ's QS to which this RQ is assigned */
+ u8 cq_idx; /* CQ index (0 to 7) in the QS */
+ u8 cont_rbdr_qs; /* Continue buffer ptrs - QS num */
+ u8 cont_qs_rbdr_idx; /* RBDR idx in the cont QS */
+ u8 start_rbdr_qs; /* First buffer ptrs - QS num */
+ u8 start_qs_rbdr_idx; /* RBDR idx in the above QS */
+ u8 caching;
+ struct rx_tx_queue_stats stats;
+ struct xdp_rxq_info xdp_rxq;
+} ____cacheline_aligned_in_smp;
+
+struct cmp_queue {
+ bool enable;
+ u16 thresh;
+ spinlock_t lock; /* lock to serialize processing CQEs */
+ void *desc;
+ struct q_desc_mem dmem;
+ int irq;
+} ____cacheline_aligned_in_smp;
+
+struct snd_queue {
+ bool enable;
+ u8 cq_qs; /* CQ's QS to which this SQ is pointing */
+ u8 cq_idx; /* CQ index (0 to 7) in the above QS */
+ u16 thresh;
+ atomic_t free_cnt;
+ u32 head;
+ u32 tail;
+ u64 *skbuff;
+ void *desc;
+ u64 *xdp_page;
+ u16 xdp_desc_cnt;
+ u16 xdp_free_cnt;
+ bool is_xdp;
+
+ /* For TSO segment's header */
+ char *tso_hdrs;
+ dma_addr_t tso_hdrs_phys;
+
+ cpumask_t affinity_mask;
+ struct q_desc_mem dmem;
+ struct rx_tx_queue_stats stats;
+} ____cacheline_aligned_in_smp;
+
+struct queue_set {
+ bool enable;
+ bool be_en;
+ u8 vnic_id;
+ u8 rq_cnt;
+ u8 cq_cnt;
+ u64 cq_len;
+ u8 sq_cnt;
+ u64 sq_len;
+ u8 rbdr_cnt;
+ u64 rbdr_len;
+ struct rcv_queue rq[MAX_RCV_QUEUES_PER_QS];
+ struct cmp_queue cq[MAX_CMP_QUEUES_PER_QS];
+ struct snd_queue sq[MAX_SND_QUEUES_PER_QS];
+ struct rbdr rbdr[MAX_RCV_BUF_DESC_RINGS_PER_QS];
+} ____cacheline_aligned_in_smp;
+
+#define GET_RBDR_DESC(RING, idx)\
+ (&(((struct rbdr_entry_t *)((RING)->desc))[idx]))
+#define GET_SQ_DESC(RING, idx)\
+ (&(((struct sq_hdr_subdesc *)((RING)->desc))[idx]))
+#define GET_CQ_DESC(RING, idx)\
+ (&(((union cq_desc_t *)((RING)->desc))[idx]))
+
+/* CQ status bits */
+#define CQ_WR_FULL BIT(26)
+#define CQ_WR_DISABLE BIT(25)
+#define CQ_WR_FAULT BIT(24)
+#define CQ_CQE_COUNT (0xFFFF << 0)
+
+#define CQ_ERR_MASK (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT)
+
+static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr)
+{
+ /* Translation is installed only when IOMMU is present */
+ if (nic->iommu_domain)
+ return iommu_iova_to_phys(nic->iommu_domain, dma_addr);
+ return dma_addr;
+}
+
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+ int hdr_sqe, u8 subdesc_cnt);
+void nicvf_config_vlan_stripping(struct nicvf *nic,
+ netdev_features_t features);
+int nicvf_set_qset_resources(struct nicvf *nic);
+int nicvf_config_data_transfer(struct nicvf *nic, bool enable);
+void nicvf_qset_config(struct nicvf *nic, bool enable);
+void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
+ int qidx, bool enable);
+
+void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx);
+void nicvf_sq_disable(struct nicvf *nic, int qidx);
+void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt);
+void nicvf_sq_free_used_descs(struct net_device *netdev,
+ struct snd_queue *sq, int qidx);
+int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
+ struct sk_buff *skb, u8 sq_num);
+int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
+ u64 bufaddr, u64 dma_addr, u16 len);
+void nicvf_xdp_sq_doorbell(struct nicvf *nic, struct snd_queue *sq, int sq_num);
+
+struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic,
+ struct cqe_rx_t *cqe_rx, bool xdp);
+void nicvf_rbdr_task(unsigned long data);
+void nicvf_rbdr_work(struct work_struct *work);
+
+void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx);
+void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx);
+void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx);
+int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx);
+
+/* Register access APIs */
+void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val);
+u64 nicvf_reg_read(struct nicvf *nic, u64 offset);
+void nicvf_qset_reg_write(struct nicvf *nic, u64 offset, u64 val);
+u64 nicvf_qset_reg_read(struct nicvf *nic, u64 offset);
+void nicvf_queue_reg_write(struct nicvf *nic, u64 offset,
+ u64 qidx, u64 val);
+u64 nicvf_queue_reg_read(struct nicvf *nic,
+ u64 offset, u64 qidx);
+
+/* Stats */
+void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx);
+void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx);
+int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx);
+int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx);
+#endif /* NICVF_QUEUES_H */
diff --git a/drivers/net/ethernet/cavium/thunder/q_struct.h b/drivers/net/ethernet/cavium/thunder/q_struct.h
new file mode 100644
index 000000000..e47205aa8
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/q_struct.h
@@ -0,0 +1,699 @@
+/*
+ * This file contains HW queue descriptor formats, config register
+ * structures etc
+ *
+ * Copyright (C) 2015 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef Q_STRUCT_H
+#define Q_STRUCT_H
+
+/* Load transaction types for reading segment bytes specified by
+ * NIC_SEND_GATHER_S[LD_TYPE].
+ */
+enum nic_send_ld_type_e {
+ NIC_SEND_LD_TYPE_E_LDD = 0x0,
+ NIC_SEND_LD_TYPE_E_LDT = 0x1,
+ NIC_SEND_LD_TYPE_E_LDWB = 0x2,
+ NIC_SEND_LD_TYPE_E_ENUM_LAST = 0x3,
+};
+
+enum ether_type_algorithm {
+ ETYPE_ALG_NONE = 0x0,
+ ETYPE_ALG_SKIP = 0x1,
+ ETYPE_ALG_ENDPARSE = 0x2,
+ ETYPE_ALG_VLAN = 0x3,
+ ETYPE_ALG_VLAN_STRIP = 0x4,
+};
+
+enum layer3_type {
+ L3TYPE_NONE = 0x00,
+ L3TYPE_GRH = 0x01,
+ L3TYPE_IPV4 = 0x04,
+ L3TYPE_IPV4_OPTIONS = 0x05,
+ L3TYPE_IPV6 = 0x06,
+ L3TYPE_IPV6_OPTIONS = 0x07,
+ L3TYPE_ET_STOP = 0x0D,
+ L3TYPE_OTHER = 0x0E,
+};
+
+enum layer4_type {
+ L4TYPE_NONE = 0x00,
+ L4TYPE_IPSEC_ESP = 0x01,
+ L4TYPE_IPFRAG = 0x02,
+ L4TYPE_IPCOMP = 0x03,
+ L4TYPE_TCP = 0x04,
+ L4TYPE_UDP = 0x05,
+ L4TYPE_SCTP = 0x06,
+ L4TYPE_GRE = 0x07,
+ L4TYPE_ROCE_BTH = 0x08,
+ L4TYPE_OTHER = 0x0E,
+};
+
+/* CPI and RSSI configuration */
+enum cpi_algorithm_type {
+ CPI_ALG_NONE = 0x0,
+ CPI_ALG_VLAN = 0x1,
+ CPI_ALG_VLAN16 = 0x2,
+ CPI_ALG_DIFF = 0x3,
+};
+
+enum rss_algorithm_type {
+ RSS_ALG_NONE = 0x00,
+ RSS_ALG_PORT = 0x01,
+ RSS_ALG_IP = 0x02,
+ RSS_ALG_TCP_IP = 0x03,
+ RSS_ALG_UDP_IP = 0x04,
+ RSS_ALG_SCTP_IP = 0x05,
+ RSS_ALG_GRE_IP = 0x06,
+ RSS_ALG_ROCE = 0x07,
+};
+
+enum rss_hash_cfg {
+ RSS_HASH_L2ETC = 0x00,
+ RSS_HASH_IP = 0x01,
+ RSS_HASH_TCP = 0x02,
+ RSS_HASH_TCP_SYN_DIS = 0x03,
+ RSS_HASH_UDP = 0x04,
+ RSS_HASH_L4ETC = 0x05,
+ RSS_HASH_ROCE = 0x06,
+ RSS_L3_BIDI = 0x07,
+ RSS_L4_BIDI = 0x08,
+};
+
+/* Completion queue entry types */
+enum cqe_type {
+ CQE_TYPE_INVALID = 0x0,
+ CQE_TYPE_RX = 0x2,
+ CQE_TYPE_RX_SPLIT = 0x3,
+ CQE_TYPE_RX_TCP = 0x4,
+ CQE_TYPE_SEND = 0x8,
+ CQE_TYPE_SEND_PTP = 0x9,
+};
+
+enum cqe_rx_tcp_status {
+ CQE_RX_STATUS_VALID_TCP_CNXT = 0x00,
+ CQE_RX_STATUS_INVALID_TCP_CNXT = 0x0F,
+};
+
+enum cqe_send_status {
+ CQE_SEND_STATUS_GOOD = 0x00,
+ CQE_SEND_STATUS_DESC_FAULT = 0x01,
+ CQE_SEND_STATUS_HDR_CONS_ERR = 0x11,
+ CQE_SEND_STATUS_SUBDESC_ERR = 0x12,
+ CQE_SEND_STATUS_IMM_SIZE_OFLOW = 0x80,
+ CQE_SEND_STATUS_CRC_SEQ_ERR = 0x81,
+ CQE_SEND_STATUS_DATA_SEQ_ERR = 0x82,
+ CQE_SEND_STATUS_MEM_SEQ_ERR = 0x83,
+ CQE_SEND_STATUS_LOCK_VIOL = 0x84,
+ CQE_SEND_STATUS_LOCK_UFLOW = 0x85,
+ CQE_SEND_STATUS_DATA_FAULT = 0x86,
+ CQE_SEND_STATUS_TSTMP_CONFLICT = 0x87,
+ CQE_SEND_STATUS_TSTMP_TIMEOUT = 0x88,
+ CQE_SEND_STATUS_MEM_FAULT = 0x89,
+ CQE_SEND_STATUS_CSUM_OVERLAP = 0x8A,
+ CQE_SEND_STATUS_CSUM_OVERFLOW = 0x8B,
+};
+
+enum cqe_rx_tcp_end_reason {
+ CQE_RX_TCP_END_FIN_FLAG_DET = 0,
+ CQE_RX_TCP_END_INVALID_FLAG = 1,
+ CQE_RX_TCP_END_TIMEOUT = 2,
+ CQE_RX_TCP_END_OUT_OF_SEQ = 3,
+ CQE_RX_TCP_END_PKT_ERR = 4,
+ CQE_RX_TCP_END_QS_DISABLED = 0x0F,
+};
+
+/* Packet protocol level error enumeration */
+enum cqe_rx_err_level {
+ CQE_RX_ERRLVL_RE = 0x0,
+ CQE_RX_ERRLVL_L2 = 0x1,
+ CQE_RX_ERRLVL_L3 = 0x2,
+ CQE_RX_ERRLVL_L4 = 0x3,
+};
+
+/* Packet protocol level error type enumeration */
+enum cqe_rx_err_opcode {
+ CQE_RX_ERR_RE_NONE = 0x0,
+ CQE_RX_ERR_RE_PARTIAL = 0x1,
+ CQE_RX_ERR_RE_JABBER = 0x2,
+ CQE_RX_ERR_RE_FCS = 0x7,
+ CQE_RX_ERR_RE_TERMINATE = 0x9,
+ CQE_RX_ERR_RE_RX_CTL = 0xb,
+ CQE_RX_ERR_PREL2_ERR = 0x1f,
+ CQE_RX_ERR_L2_FRAGMENT = 0x20,
+ CQE_RX_ERR_L2_OVERRUN = 0x21,
+ CQE_RX_ERR_L2_PFCS = 0x22,
+ CQE_RX_ERR_L2_PUNY = 0x23,
+ CQE_RX_ERR_L2_MAL = 0x24,
+ CQE_RX_ERR_L2_OVERSIZE = 0x25,
+ CQE_RX_ERR_L2_UNDERSIZE = 0x26,
+ CQE_RX_ERR_L2_LENMISM = 0x27,
+ CQE_RX_ERR_L2_PCLP = 0x28,
+ CQE_RX_ERR_IP_NOT = 0x41,
+ CQE_RX_ERR_IP_CHK = 0x42,
+ CQE_RX_ERR_IP_MAL = 0x43,
+ CQE_RX_ERR_IP_MALD = 0x44,
+ CQE_RX_ERR_IP_HOP = 0x45,
+ CQE_RX_ERR_L3_ICRC = 0x46,
+ CQE_RX_ERR_L3_PCLP = 0x47,
+ CQE_RX_ERR_L4_MAL = 0x61,
+ CQE_RX_ERR_L4_CHK = 0x62,
+ CQE_RX_ERR_UDP_LEN = 0x63,
+ CQE_RX_ERR_L4_PORT = 0x64,
+ CQE_RX_ERR_TCP_FLAG = 0x65,
+ CQE_RX_ERR_TCP_OFFSET = 0x66,
+ CQE_RX_ERR_L4_PCLP = 0x67,
+ CQE_RX_ERR_RBDR_TRUNC = 0x70,
+};
+
+struct cqe_rx_t {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 cqe_type:4; /* W0 */
+ u64 stdn_fault:1;
+ u64 rsvd0:1;
+ u64 rq_qs:7;
+ u64 rq_idx:3;
+ u64 rsvd1:12;
+ u64 rss_alg:4;
+ u64 rsvd2:4;
+ u64 rb_cnt:4;
+ u64 vlan_found:1;
+ u64 vlan_stripped:1;
+ u64 vlan2_found:1;
+ u64 vlan2_stripped:1;
+ u64 l4_type:4;
+ u64 l3_type:4;
+ u64 l2_present:1;
+ u64 err_level:3;
+ u64 err_opcode:8;
+
+ u64 pkt_len:16; /* W1 */
+ u64 l2_ptr:8;
+ u64 l3_ptr:8;
+ u64 l4_ptr:8;
+ u64 cq_pkt_len:8;
+ u64 align_pad:3;
+ u64 rsvd3:1;
+ u64 chan:12;
+
+ u64 rss_tag:32; /* W2 */
+ u64 vlan_tci:16;
+ u64 vlan_ptr:8;
+ u64 vlan2_ptr:8;
+
+ u64 rb3_sz:16; /* W3 */
+ u64 rb2_sz:16;
+ u64 rb1_sz:16;
+ u64 rb0_sz:16;
+
+ u64 rb7_sz:16; /* W4 */
+ u64 rb6_sz:16;
+ u64 rb5_sz:16;
+ u64 rb4_sz:16;
+
+ u64 rb11_sz:16; /* W5 */
+ u64 rb10_sz:16;
+ u64 rb9_sz:16;
+ u64 rb8_sz:16;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 err_opcode:8;
+ u64 err_level:3;
+ u64 l2_present:1;
+ u64 l3_type:4;
+ u64 l4_type:4;
+ u64 vlan2_stripped:1;
+ u64 vlan2_found:1;
+ u64 vlan_stripped:1;
+ u64 vlan_found:1;
+ u64 rb_cnt:4;
+ u64 rsvd2:4;
+ u64 rss_alg:4;
+ u64 rsvd1:12;
+ u64 rq_idx:3;
+ u64 rq_qs:7;
+ u64 rsvd0:1;
+ u64 stdn_fault:1;
+ u64 cqe_type:4; /* W0 */
+ u64 chan:12;
+ u64 rsvd3:1;
+ u64 align_pad:3;
+ u64 cq_pkt_len:8;
+ u64 l4_ptr:8;
+ u64 l3_ptr:8;
+ u64 l2_ptr:8;
+ u64 pkt_len:16; /* W1 */
+ u64 vlan2_ptr:8;
+ u64 vlan_ptr:8;
+ u64 vlan_tci:16;
+ u64 rss_tag:32; /* W2 */
+ u64 rb0_sz:16;
+ u64 rb1_sz:16;
+ u64 rb2_sz:16;
+ u64 rb3_sz:16; /* W3 */
+ u64 rb4_sz:16;
+ u64 rb5_sz:16;
+ u64 rb6_sz:16;
+ u64 rb7_sz:16; /* W4 */
+ u64 rb8_sz:16;
+ u64 rb9_sz:16;
+ u64 rb10_sz:16;
+ u64 rb11_sz:16; /* W5 */
+#endif
+ u64 rb0_ptr:64;
+ u64 rb1_ptr:64;
+ u64 rb2_ptr:64;
+ u64 rb3_ptr:64;
+ u64 rb4_ptr:64;
+ u64 rb5_ptr:64;
+ u64 rb6_ptr:64;
+ u64 rb7_ptr:64;
+ u64 rb8_ptr:64;
+ u64 rb9_ptr:64;
+ u64 rb10_ptr:64;
+ u64 rb11_ptr:64;
+};
+
+struct cqe_rx_tcp_err_t {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 cqe_type:4; /* W0 */
+ u64 rsvd0:60;
+
+ u64 rsvd1:4; /* W1 */
+ u64 partial_first:1;
+ u64 rsvd2:27;
+ u64 rbdr_bytes:8;
+ u64 rsvd3:24;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 rsvd0:60;
+ u64 cqe_type:4;
+
+ u64 rsvd3:24;
+ u64 rbdr_bytes:8;
+ u64 rsvd2:27;
+ u64 partial_first:1;
+ u64 rsvd1:4;
+#endif
+};
+
+struct cqe_rx_tcp_t {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 cqe_type:4; /* W0 */
+ u64 rsvd0:52;
+ u64 cq_tcp_status:8;
+
+ u64 rsvd1:32; /* W1 */
+ u64 tcp_cntx_bytes:8;
+ u64 rsvd2:8;
+ u64 tcp_err_bytes:16;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 cq_tcp_status:8;
+ u64 rsvd0:52;
+ u64 cqe_type:4; /* W0 */
+
+ u64 tcp_err_bytes:16;
+ u64 rsvd2:8;
+ u64 tcp_cntx_bytes:8;
+ u64 rsvd1:32; /* W1 */
+#endif
+};
+
+struct cqe_send_t {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 cqe_type:4; /* W0 */
+ u64 rsvd0:4;
+ u64 sqe_ptr:16;
+ u64 rsvd1:4;
+ u64 rsvd2:10;
+ u64 sq_qs:7;
+ u64 sq_idx:3;
+ u64 rsvd3:8;
+ u64 send_status:8;
+
+ u64 ptp_timestamp:64; /* W1 */
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 send_status:8;
+ u64 rsvd3:8;
+ u64 sq_idx:3;
+ u64 sq_qs:7;
+ u64 rsvd2:10;
+ u64 rsvd1:4;
+ u64 sqe_ptr:16;
+ u64 rsvd0:4;
+ u64 cqe_type:4; /* W0 */
+
+ u64 ptp_timestamp:64; /* W1 */
+#endif
+};
+
+union cq_desc_t {
+ u64 u[64];
+ struct cqe_send_t snd_hdr;
+ struct cqe_rx_t rx_hdr;
+ struct cqe_rx_tcp_t rx_tcp_hdr;
+ struct cqe_rx_tcp_err_t rx_tcp_err_hdr;
+};
+
+struct rbdr_entry_t {
+ u64 buf_addr;
+};
+
+/* TCP reassembly context */
+struct rbe_tcp_cnxt_t {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 tcp_pkt_cnt:12;
+ u64 rsvd1:4;
+ u64 align_hdr_bytes:4;
+ u64 align_ptr_bytes:4;
+ u64 ptr_bytes:16;
+ u64 rsvd2:24;
+ u64 cqe_type:4;
+ u64 rsvd0:54;
+ u64 tcp_end_reason:2;
+ u64 tcp_status:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 tcp_status:4;
+ u64 tcp_end_reason:2;
+ u64 rsvd0:54;
+ u64 cqe_type:4;
+ u64 rsvd2:24;
+ u64 ptr_bytes:16;
+ u64 align_ptr_bytes:4;
+ u64 align_hdr_bytes:4;
+ u64 rsvd1:4;
+ u64 tcp_pkt_cnt:12;
+#endif
+};
+
+/* Always Big endian */
+struct rx_hdr_t {
+ u64 opaque:32;
+ u64 rss_flow:8;
+ u64 skip_length:6;
+ u64 disable_rss:1;
+ u64 disable_tcp_reassembly:1;
+ u64 nodrop:1;
+ u64 dest_alg:2;
+ u64 rsvd0:2;
+ u64 dest_rq:11;
+};
+
+enum send_l4_csum_type {
+ SEND_L4_CSUM_DISABLE = 0x00,
+ SEND_L4_CSUM_UDP = 0x01,
+ SEND_L4_CSUM_TCP = 0x02,
+ SEND_L4_CSUM_SCTP = 0x03,
+};
+
+enum send_crc_alg {
+ SEND_CRCALG_CRC32 = 0x00,
+ SEND_CRCALG_CRC32C = 0x01,
+ SEND_CRCALG_ICRC = 0x02,
+};
+
+enum send_load_type {
+ SEND_LD_TYPE_LDD = 0x00,
+ SEND_LD_TYPE_LDT = 0x01,
+ SEND_LD_TYPE_LDWB = 0x02,
+};
+
+enum send_mem_alg_type {
+ SEND_MEMALG_SET = 0x00,
+ SEND_MEMALG_ADD = 0x08,
+ SEND_MEMALG_SUB = 0x09,
+ SEND_MEMALG_ADDLEN = 0x0A,
+ SEND_MEMALG_SUBLEN = 0x0B,
+};
+
+enum send_mem_dsz_type {
+ SEND_MEMDSZ_B64 = 0x00,
+ SEND_MEMDSZ_B32 = 0x01,
+ SEND_MEMDSZ_B8 = 0x03,
+};
+
+enum sq_subdesc_type {
+ SQ_DESC_TYPE_INVALID = 0x00,
+ SQ_DESC_TYPE_HEADER = 0x01,
+ SQ_DESC_TYPE_CRC = 0x02,
+ SQ_DESC_TYPE_IMMEDIATE = 0x03,
+ SQ_DESC_TYPE_GATHER = 0x04,
+ SQ_DESC_TYPE_MEMORY = 0x05,
+};
+
+struct sq_crc_subdesc {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 rsvd1:32;
+ u64 crc_ival:32;
+ u64 subdesc_type:4;
+ u64 crc_alg:2;
+ u64 rsvd0:10;
+ u64 crc_insert_pos:16;
+ u64 hdr_start:16;
+ u64 crc_len:16;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 crc_len:16;
+ u64 hdr_start:16;
+ u64 crc_insert_pos:16;
+ u64 rsvd0:10;
+ u64 crc_alg:2;
+ u64 subdesc_type:4;
+ u64 crc_ival:32;
+ u64 rsvd1:32;
+#endif
+};
+
+struct sq_gather_subdesc {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 subdesc_type:4; /* W0 */
+ u64 ld_type:2;
+ u64 rsvd0:42;
+ u64 size:16;
+
+ u64 rsvd1:15; /* W1 */
+ u64 addr:49;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 size:16;
+ u64 rsvd0:42;
+ u64 ld_type:2;
+ u64 subdesc_type:4; /* W0 */
+
+ u64 addr:49;
+ u64 rsvd1:15; /* W1 */
+#endif
+};
+
+/* SQ immediate subdescriptor */
+struct sq_imm_subdesc {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 subdesc_type:4; /* W0 */
+ u64 rsvd0:46;
+ u64 len:14;
+
+ u64 data:64; /* W1 */
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 len:14;
+ u64 rsvd0:46;
+ u64 subdesc_type:4; /* W0 */
+
+ u64 data:64; /* W1 */
+#endif
+};
+
+struct sq_mem_subdesc {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 subdesc_type:4; /* W0 */
+ u64 mem_alg:4;
+ u64 mem_dsz:2;
+ u64 wmem:1;
+ u64 rsvd0:21;
+ u64 offset:32;
+
+ u64 rsvd1:15; /* W1 */
+ u64 addr:49;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 offset:32;
+ u64 rsvd0:21;
+ u64 wmem:1;
+ u64 mem_dsz:2;
+ u64 mem_alg:4;
+ u64 subdesc_type:4; /* W0 */
+
+ u64 addr:49;
+ u64 rsvd1:15; /* W1 */
+#endif
+};
+
+struct sq_hdr_subdesc {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 subdesc_type:4;
+ u64 tso:1;
+ u64 post_cqe:1; /* Post CQE on no error also */
+ u64 dont_send:1;
+ u64 tstmp:1;
+ u64 subdesc_cnt:8;
+ u64 csum_l4:2;
+ u64 csum_l3:1;
+ u64 csum_inner_l4:2;
+ u64 csum_inner_l3:1;
+ u64 rsvd0:2;
+ u64 l4_offset:8;
+ u64 l3_offset:8;
+ u64 rsvd1:4;
+ u64 tot_len:20; /* W0 */
+
+ u64 rsvd2:24;
+ u64 inner_l4_offset:8;
+ u64 inner_l3_offset:8;
+ u64 tso_start:8;
+ u64 rsvd3:2;
+ u64 tso_max_paysize:14; /* W1 */
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 tot_len:20;
+ u64 rsvd1:4;
+ u64 l3_offset:8;
+ u64 l4_offset:8;
+ u64 rsvd0:2;
+ u64 csum_inner_l3:1;
+ u64 csum_inner_l4:2;
+ u64 csum_l3:1;
+ u64 csum_l4:2;
+ u64 subdesc_cnt:8;
+ u64 tstmp:1;
+ u64 dont_send:1;
+ u64 post_cqe:1; /* Post CQE on no error also */
+ u64 tso:1;
+ u64 subdesc_type:4; /* W0 */
+
+ u64 tso_max_paysize:14;
+ u64 rsvd3:2;
+ u64 tso_start:8;
+ u64 inner_l3_offset:8;
+ u64 inner_l4_offset:8;
+ u64 rsvd2:24; /* W1 */
+#endif
+};
+
+/* Queue config register formats */
+struct rq_cfg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 reserved_2_63:62;
+ u64 ena:1;
+ u64 tcp_ena:1;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 tcp_ena:1;
+ u64 ena:1;
+ u64 reserved_2_63:62;
+#endif
+};
+
+struct cq_cfg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 reserved_43_63:21;
+ u64 ena:1;
+ u64 reset:1;
+ u64 caching:1;
+ u64 reserved_35_39:5;
+ u64 qsize:3;
+ u64 reserved_25_31:7;
+ u64 avg_con:9;
+ u64 reserved_0_15:16;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 reserved_0_15:16;
+ u64 avg_con:9;
+ u64 reserved_25_31:7;
+ u64 qsize:3;
+ u64 reserved_35_39:5;
+ u64 caching:1;
+ u64 reset:1;
+ u64 ena:1;
+ u64 reserved_43_63:21;
+#endif
+};
+
+struct sq_cfg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 reserved_32_63:32;
+ u64 cq_limit:8;
+ u64 reserved_20_23:4;
+ u64 ena:1;
+ u64 reserved_18_18:1;
+ u64 reset:1;
+ u64 ldwb:1;
+ u64 reserved_11_15:5;
+ u64 qsize:3;
+ u64 reserved_3_7:5;
+ u64 tstmp_bgx_intf:3;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 tstmp_bgx_intf:3;
+ u64 reserved_3_7:5;
+ u64 qsize:3;
+ u64 reserved_11_15:5;
+ u64 ldwb:1;
+ u64 reset:1;
+ u64 reserved_18_18:1;
+ u64 ena:1;
+ u64 reserved_20_23:4;
+ u64 cq_limit:8;
+ u64 reserved_32_63:32;
+#endif
+};
+
+struct rbdr_cfg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 reserved_45_63:19;
+ u64 ena:1;
+ u64 reset:1;
+ u64 ldwb:1;
+ u64 reserved_36_41:6;
+ u64 qsize:4;
+ u64 reserved_25_31:7;
+ u64 avg_con:9;
+ u64 reserved_12_15:4;
+ u64 lines:12;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 lines:12;
+ u64 reserved_12_15:4;
+ u64 avg_con:9;
+ u64 reserved_25_31:7;
+ u64 qsize:4;
+ u64 reserved_36_41:6;
+ u64 ldwb:1;
+ u64 reset:1;
+ u64 ena: 1;
+ u64 reserved_45_63:19;
+#endif
+};
+
+struct qs_cfg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 reserved_32_63:32;
+ u64 ena:1;
+ u64 reserved_27_30:4;
+ u64 sq_ins_ena:1;
+ u64 sq_ins_pos:6;
+ u64 lock_ena:1;
+ u64 lock_viol_cqe_ena:1;
+ u64 send_tstmp_ena:1;
+ u64 be:1;
+ u64 reserved_7_15:9;
+ u64 vnic:7;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u64 vnic:7;
+ u64 reserved_7_15:9;
+ u64 be:1;
+ u64 send_tstmp_ena:1;
+ u64 lock_viol_cqe_ena:1;
+ u64 lock_ena:1;
+ u64 sq_ins_pos:6;
+ u64 sq_ins_ena:1;
+ u64 reserved_27_30:4;
+ u64 ena:1;
+ u64 reserved_32_63:32;
+#endif
+};
+
+#endif /* Q_STRUCT_H */
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
new file mode 100644
index 000000000..e5fc89813
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -0,0 +1,1731 @@
+/*
+ * Copyright (C) 2015 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+
+#include "nic_reg.h"
+#include "nic.h"
+#include "thunder_bgx.h"
+
+#define DRV_NAME "thunder_bgx"
+#define DRV_VERSION "1.0"
+
+/* RX_DMAC_CTL configuration */
+enum MCAST_MODE {
+ MCAST_MODE_REJECT = 0x0,
+ MCAST_MODE_ACCEPT = 0x1,
+ MCAST_MODE_CAM_FILTER = 0x2,
+ RSVD = 0x3
+};
+
+#define BCAST_ACCEPT BIT(0)
+#define CAM_ACCEPT BIT(3)
+#define MCAST_MODE_MASK 0x3
+#define BGX_MCAST_MODE(x) (x << 1)
+
+struct dmac_map {
+ u64 vf_map;
+ u64 dmac;
+};
+
+struct lmac {
+ struct bgx *bgx;
+ /* actual number of DMACs configured */
+ u8 dmacs_cfg;
+ /* overal number of possible DMACs could be configured per LMAC */
+ u8 dmacs_count;
+ struct dmac_map *dmacs; /* DMAC:VFs tracking filter array */
+ u8 mac[ETH_ALEN];
+ u8 lmac_type;
+ u8 lane_to_sds;
+ bool use_training;
+ bool autoneg;
+ bool link_up;
+ int lmacid; /* ID within BGX */
+ int lmacid_bd; /* ID on board */
+ struct net_device netdev;
+ struct phy_device *phydev;
+ unsigned int last_duplex;
+ unsigned int last_link;
+ unsigned int last_speed;
+ bool is_sgmii;
+ struct delayed_work dwork;
+ struct workqueue_struct *check_link;
+};
+
+struct bgx {
+ u8 bgx_id;
+ struct lmac lmac[MAX_LMAC_PER_BGX];
+ u8 lmac_count;
+ u8 max_lmac;
+ u8 acpi_lmac_idx;
+ void __iomem *reg_base;
+ struct pci_dev *pdev;
+ bool is_dlm;
+ bool is_rgx;
+};
+
+static struct bgx *bgx_vnic[MAX_BGX_THUNDER];
+static int lmac_count; /* Total no of LMACs in system */
+
+static int bgx_xaui_check_link(struct lmac *lmac);
+
+/* Supported devices */
+static const struct pci_device_id bgx_id_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_BGX) },
+ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_RGX) },
+ { 0, } /* end of table */
+};
+
+MODULE_AUTHOR("Cavium Inc");
+MODULE_DESCRIPTION("Cavium Thunder BGX/MAC Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, bgx_id_table);
+
+/* The Cavium ThunderX network controller can *only* be found in SoCs
+ * containing the ThunderX ARM64 CPU implementation. All accesses to the device
+ * registers on this platform are implicitly strongly ordered with respect
+ * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use
+ * with no memory barriers in this driver. The readq()/writeq() functions add
+ * explicit ordering operation which in this case are redundant, and only
+ * add overhead.
+ */
+
+/* Register read/write APIs */
+static u64 bgx_reg_read(struct bgx *bgx, u8 lmac, u64 offset)
+{
+ void __iomem *addr = bgx->reg_base + ((u32)lmac << 20) + offset;
+
+ return readq_relaxed(addr);
+}
+
+static void bgx_reg_write(struct bgx *bgx, u8 lmac, u64 offset, u64 val)
+{
+ void __iomem *addr = bgx->reg_base + ((u32)lmac << 20) + offset;
+
+ writeq_relaxed(val, addr);
+}
+
+static void bgx_reg_modify(struct bgx *bgx, u8 lmac, u64 offset, u64 val)
+{
+ void __iomem *addr = bgx->reg_base + ((u32)lmac << 20) + offset;
+
+ writeq_relaxed(val | readq_relaxed(addr), addr);
+}
+
+static int bgx_poll_reg(struct bgx *bgx, u8 lmac, u64 reg, u64 mask, bool zero)
+{
+ int timeout = 100;
+ u64 reg_val;
+
+ while (timeout) {
+ reg_val = bgx_reg_read(bgx, lmac, reg);
+ if (zero && !(reg_val & mask))
+ return 0;
+ if (!zero && (reg_val & mask))
+ return 0;
+ usleep_range(1000, 2000);
+ timeout--;
+ }
+ return 1;
+}
+
+static int max_bgx_per_node;
+static void set_max_bgx_per_node(struct pci_dev *pdev)
+{
+ u16 sdevid;
+
+ if (max_bgx_per_node)
+ return;
+
+ pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
+ switch (sdevid) {
+ case PCI_SUBSYS_DEVID_81XX_BGX:
+ case PCI_SUBSYS_DEVID_81XX_RGX:
+ max_bgx_per_node = MAX_BGX_PER_CN81XX;
+ break;
+ case PCI_SUBSYS_DEVID_83XX_BGX:
+ max_bgx_per_node = MAX_BGX_PER_CN83XX;
+ break;
+ case PCI_SUBSYS_DEVID_88XX_BGX:
+ default:
+ max_bgx_per_node = MAX_BGX_PER_CN88XX;
+ break;
+ }
+}
+
+static struct bgx *get_bgx(int node, int bgx_idx)
+{
+ int idx = (node * max_bgx_per_node) + bgx_idx;
+
+ return bgx_vnic[idx];
+}
+
+/* Return number of BGX present in HW */
+unsigned bgx_get_map(int node)
+{
+ int i;
+ unsigned map = 0;
+
+ for (i = 0; i < max_bgx_per_node; i++) {
+ if (bgx_vnic[(node * max_bgx_per_node) + i])
+ map |= (1 << i);
+ }
+
+ return map;
+}
+EXPORT_SYMBOL(bgx_get_map);
+
+/* Return number of LMAC configured for this BGX */
+int bgx_get_lmac_count(int node, int bgx_idx)
+{
+ struct bgx *bgx;
+
+ bgx = get_bgx(node, bgx_idx);
+ if (bgx)
+ return bgx->lmac_count;
+
+ return 0;
+}
+EXPORT_SYMBOL(bgx_get_lmac_count);
+
+/* Returns the current link status of LMAC */
+void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status)
+{
+ struct bgx_link_status *link = (struct bgx_link_status *)status;
+ struct bgx *bgx;
+ struct lmac *lmac;
+
+ bgx = get_bgx(node, bgx_idx);
+ if (!bgx)
+ return;
+
+ lmac = &bgx->lmac[lmacid];
+ link->mac_type = lmac->lmac_type;
+ link->link_up = lmac->link_up;
+ link->duplex = lmac->last_duplex;
+ link->speed = lmac->last_speed;
+}
+EXPORT_SYMBOL(bgx_get_lmac_link_state);
+
+const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+
+ if (bgx)
+ return bgx->lmac[lmacid].mac;
+
+ return NULL;
+}
+EXPORT_SYMBOL(bgx_get_lmac_mac);
+
+void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+
+ if (!bgx)
+ return;
+
+ ether_addr_copy(bgx->lmac[lmacid].mac, mac);
+}
+EXPORT_SYMBOL(bgx_set_lmac_mac);
+
+static void bgx_flush_dmac_cam_filter(struct bgx *bgx, int lmacid)
+{
+ struct lmac *lmac = NULL;
+ u8 idx = 0;
+
+ lmac = &bgx->lmac[lmacid];
+ /* reset CAM filters */
+ for (idx = 0; idx < lmac->dmacs_count; idx++)
+ bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM +
+ ((lmacid * lmac->dmacs_count) + idx) *
+ sizeof(u64), 0);
+}
+
+static void bgx_lmac_remove_filters(struct lmac *lmac, u8 vf_id)
+{
+ int i = 0;
+
+ if (!lmac)
+ return;
+
+ /* We've got reset filters request from some of attached VF, while the
+ * others might want to keep their configuration. So in this case lets
+ * iterate over all of configured filters and decrease number of
+ * referencies. if some addresses get zero refs remove them from list
+ */
+ for (i = lmac->dmacs_cfg - 1; i >= 0; i--) {
+ lmac->dmacs[i].vf_map &= ~BIT_ULL(vf_id);
+ if (!lmac->dmacs[i].vf_map) {
+ lmac->dmacs_cfg--;
+ lmac->dmacs[i].dmac = 0;
+ lmac->dmacs[i].vf_map = 0;
+ }
+ }
+}
+
+static int bgx_lmac_save_filter(struct lmac *lmac, u64 dmac, u8 vf_id)
+{
+ u8 i = 0;
+
+ if (!lmac)
+ return -1;
+
+ /* At the same time we could have several VFs 'attached' to some
+ * particular LMAC, and each VF is represented as network interface
+ * for kernel. So from user perspective it should be possible to
+ * manipulate with its' (VF) receive modes. However from PF
+ * driver perspective we need to keep track of filter configurations
+ * for different VFs to prevent filter values dupes
+ */
+ for (i = 0; i < lmac->dmacs_cfg; i++) {
+ if (lmac->dmacs[i].dmac == dmac) {
+ lmac->dmacs[i].vf_map |= BIT_ULL(vf_id);
+ return -1;
+ }
+ }
+
+ if (!(lmac->dmacs_cfg < lmac->dmacs_count))
+ return -1;
+
+ /* keep it for further tracking */
+ lmac->dmacs[lmac->dmacs_cfg].dmac = dmac;
+ lmac->dmacs[lmac->dmacs_cfg].vf_map = BIT_ULL(vf_id);
+ lmac->dmacs_cfg++;
+ return 0;
+}
+
+static int bgx_set_dmac_cam_filter_mac(struct bgx *bgx, int lmacid,
+ u64 cam_dmac, u8 idx)
+{
+ struct lmac *lmac = NULL;
+ u64 cfg = 0;
+
+ /* skip zero addresses as meaningless */
+ if (!cam_dmac || !bgx)
+ return -1;
+
+ lmac = &bgx->lmac[lmacid];
+
+ /* configure DCAM filtering for designated LMAC */
+ cfg = RX_DMACX_CAM_LMACID(lmacid & LMAC_ID_MASK) |
+ RX_DMACX_CAM_EN | cam_dmac;
+ bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM +
+ ((lmacid * lmac->dmacs_count) + idx) * sizeof(u64), cfg);
+ return 0;
+}
+
+void bgx_set_dmac_cam_filter(int node, int bgx_idx, int lmacid,
+ u64 cam_dmac, u8 vf_id)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+ struct lmac *lmac = NULL;
+
+ if (!bgx)
+ return;
+
+ lmac = &bgx->lmac[lmacid];
+
+ if (!cam_dmac)
+ cam_dmac = ether_addr_to_u64(lmac->mac);
+
+ /* since we might have several VFs attached to particular LMAC
+ * and kernel could call mcast config for each of them with the
+ * same MAC, check if requested MAC is already in filtering list and
+ * updare/prepare list of MACs to be applied later to HW filters
+ */
+ bgx_lmac_save_filter(lmac, cam_dmac, vf_id);
+}
+EXPORT_SYMBOL(bgx_set_dmac_cam_filter);
+
+void bgx_set_xcast_mode(int node, int bgx_idx, int lmacid, u8 mode)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+ struct lmac *lmac = NULL;
+ u64 cfg = 0;
+ u8 i = 0;
+
+ if (!bgx)
+ return;
+
+ lmac = &bgx->lmac[lmacid];
+
+ cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL);
+ if (mode & BGX_XCAST_BCAST_ACCEPT)
+ cfg |= BCAST_ACCEPT;
+ else
+ cfg &= ~BCAST_ACCEPT;
+
+ /* disable all MCASTs and DMAC filtering */
+ cfg &= ~(CAM_ACCEPT | BGX_MCAST_MODE(MCAST_MODE_MASK));
+
+ /* check requested bits and set filtergin mode appropriately */
+ if (mode & (BGX_XCAST_MCAST_ACCEPT)) {
+ cfg |= (BGX_MCAST_MODE(MCAST_MODE_ACCEPT));
+ } else if (mode & BGX_XCAST_MCAST_FILTER) {
+ cfg |= (BGX_MCAST_MODE(MCAST_MODE_CAM_FILTER) | CAM_ACCEPT);
+ for (i = 0; i < lmac->dmacs_cfg; i++)
+ bgx_set_dmac_cam_filter_mac(bgx, lmacid,
+ lmac->dmacs[i].dmac, i);
+ }
+ bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, cfg);
+}
+EXPORT_SYMBOL(bgx_set_xcast_mode);
+
+void bgx_reset_xcast_mode(int node, int bgx_idx, int lmacid, u8 vf_id)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+
+ if (!bgx)
+ return;
+
+ bgx_lmac_remove_filters(&bgx->lmac[lmacid], vf_id);
+ bgx_flush_dmac_cam_filter(bgx, lmacid);
+ bgx_set_xcast_mode(node, bgx_idx, lmacid,
+ (BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT));
+}
+EXPORT_SYMBOL(bgx_reset_xcast_mode);
+
+void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+ struct lmac *lmac;
+ u64 cfg;
+
+ if (!bgx)
+ return;
+ lmac = &bgx->lmac[lmacid];
+
+ cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+ if (enable) {
+ cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN;
+
+ /* enable TX FIFO Underflow interrupt */
+ bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1S,
+ GMI_TXX_INT_UNDFLW);
+ } else {
+ cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+
+ /* Disable TX FIFO Underflow interrupt */
+ bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1C,
+ GMI_TXX_INT_UNDFLW);
+ }
+ bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+ if (bgx->is_rgx)
+ xcv_setup_link(enable ? lmac->link_up : 0, lmac->last_speed);
+}
+EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
+
+/* Enables or disables timestamp insertion by BGX for Rx packets */
+void bgx_config_timestamping(int node, int bgx_idx, int lmacid, bool enable)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+ struct lmac *lmac;
+ u64 csr_offset, cfg;
+
+ if (!bgx)
+ return;
+
+ lmac = &bgx->lmac[lmacid];
+
+ if (lmac->lmac_type == BGX_MODE_SGMII ||
+ lmac->lmac_type == BGX_MODE_QSGMII ||
+ lmac->lmac_type == BGX_MODE_RGMII)
+ csr_offset = BGX_GMP_GMI_RXX_FRM_CTL;
+ else
+ csr_offset = BGX_SMUX_RX_FRM_CTL;
+
+ cfg = bgx_reg_read(bgx, lmacid, csr_offset);
+
+ if (enable)
+ cfg |= BGX_PKT_RX_PTP_EN;
+ else
+ cfg &= ~BGX_PKT_RX_PTP_EN;
+ bgx_reg_write(bgx, lmacid, csr_offset, cfg);
+}
+EXPORT_SYMBOL(bgx_config_timestamping);
+
+void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause)
+{
+ struct pfc *pfc = (struct pfc *)pause;
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+ struct lmac *lmac;
+ u64 cfg;
+
+ if (!bgx)
+ return;
+ lmac = &bgx->lmac[lmacid];
+ if (lmac->is_sgmii)
+ return;
+
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_CBFC_CTL);
+ pfc->fc_rx = cfg & RX_EN;
+ pfc->fc_tx = cfg & TX_EN;
+ pfc->autoneg = 0;
+}
+EXPORT_SYMBOL(bgx_lmac_get_pfc);
+
+void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause)
+{
+ struct pfc *pfc = (struct pfc *)pause;
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+ struct lmac *lmac;
+ u64 cfg;
+
+ if (!bgx)
+ return;
+ lmac = &bgx->lmac[lmacid];
+ if (lmac->is_sgmii)
+ return;
+
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_CBFC_CTL);
+ cfg &= ~(RX_EN | TX_EN);
+ cfg |= (pfc->fc_rx ? RX_EN : 0x00);
+ cfg |= (pfc->fc_tx ? TX_EN : 0x00);
+ bgx_reg_write(bgx, lmacid, BGX_SMUX_CBFC_CTL, cfg);
+}
+EXPORT_SYMBOL(bgx_lmac_set_pfc);
+
+static void bgx_sgmii_change_link_state(struct lmac *lmac)
+{
+ struct bgx *bgx = lmac->bgx;
+ u64 cmr_cfg;
+ u64 port_cfg = 0;
+ u64 misc_ctl = 0;
+ bool tx_en, rx_en;
+
+ cmr_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_CMRX_CFG);
+ tx_en = cmr_cfg & CMR_PKT_TX_EN;
+ rx_en = cmr_cfg & CMR_PKT_RX_EN;
+ cmr_cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+ bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg);
+
+ /* Wait for BGX RX to be idle */
+ if (bgx_poll_reg(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG,
+ GMI_PORT_CFG_RX_IDLE, false)) {
+ dev_err(&bgx->pdev->dev, "BGX%d LMAC%d GMI RX not idle\n",
+ bgx->bgx_id, lmac->lmacid);
+ return;
+ }
+
+ /* Wait for BGX TX to be idle */
+ if (bgx_poll_reg(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG,
+ GMI_PORT_CFG_TX_IDLE, false)) {
+ dev_err(&bgx->pdev->dev, "BGX%d LMAC%d GMI TX not idle\n",
+ bgx->bgx_id, lmac->lmacid);
+ return;
+ }
+
+ port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG);
+ misc_ctl = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_PCS_MISCX_CTL);
+
+ if (lmac->link_up) {
+ misc_ctl &= ~PCS_MISC_CTL_GMX_ENO;
+ port_cfg &= ~GMI_PORT_CFG_DUPLEX;
+ port_cfg |= (lmac->last_duplex << 2);
+ } else {
+ misc_ctl |= PCS_MISC_CTL_GMX_ENO;
+ }
+
+ switch (lmac->last_speed) {
+ case 10:
+ port_cfg &= ~GMI_PORT_CFG_SPEED; /* speed 0 */
+ port_cfg |= GMI_PORT_CFG_SPEED_MSB; /* speed_msb 1 */
+ port_cfg &= ~GMI_PORT_CFG_SLOT_TIME; /* slottime 0 */
+ misc_ctl &= ~PCS_MISC_CTL_SAMP_PT_MASK;
+ misc_ctl |= 50; /* samp_pt */
+ bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_SLOT, 64);
+ bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_BURST, 0);
+ break;
+ case 100:
+ port_cfg &= ~GMI_PORT_CFG_SPEED; /* speed 0 */
+ port_cfg &= ~GMI_PORT_CFG_SPEED_MSB; /* speed_msb 0 */
+ port_cfg &= ~GMI_PORT_CFG_SLOT_TIME; /* slottime 0 */
+ misc_ctl &= ~PCS_MISC_CTL_SAMP_PT_MASK;
+ misc_ctl |= 5; /* samp_pt */
+ bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_SLOT, 64);
+ bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_BURST, 0);
+ break;
+ case 1000:
+ port_cfg |= GMI_PORT_CFG_SPEED; /* speed 1 */
+ port_cfg &= ~GMI_PORT_CFG_SPEED_MSB; /* speed_msb 0 */
+ port_cfg |= GMI_PORT_CFG_SLOT_TIME; /* slottime 1 */
+ misc_ctl &= ~PCS_MISC_CTL_SAMP_PT_MASK;
+ misc_ctl |= 1; /* samp_pt */
+ bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_SLOT, 512);
+ if (lmac->last_duplex)
+ bgx_reg_write(bgx, lmac->lmacid,
+ BGX_GMP_GMI_TXX_BURST, 0);
+ else
+ bgx_reg_write(bgx, lmac->lmacid,
+ BGX_GMP_GMI_TXX_BURST, 8192);
+ break;
+ default:
+ break;
+ }
+ bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_PCS_MISCX_CTL, misc_ctl);
+ bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG, port_cfg);
+
+ /* Restore CMR config settings */
+ cmr_cfg |= (rx_en ? CMR_PKT_RX_EN : 0) | (tx_en ? CMR_PKT_TX_EN : 0);
+ bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg);
+
+ if (bgx->is_rgx && (cmr_cfg & (CMR_PKT_RX_EN | CMR_PKT_TX_EN)))
+ xcv_setup_link(lmac->link_up, lmac->last_speed);
+}
+
+static void bgx_lmac_handler(struct net_device *netdev)
+{
+ struct lmac *lmac = container_of(netdev, struct lmac, netdev);
+ struct phy_device *phydev;
+ int link_changed = 0;
+
+ if (!lmac)
+ return;
+
+ phydev = lmac->phydev;
+
+ if (!phydev->link && lmac->last_link)
+ link_changed = -1;
+
+ if (phydev->link &&
+ (lmac->last_duplex != phydev->duplex ||
+ lmac->last_link != phydev->link ||
+ lmac->last_speed != phydev->speed)) {
+ link_changed = 1;
+ }
+
+ lmac->last_link = phydev->link;
+ lmac->last_speed = phydev->speed;
+ lmac->last_duplex = phydev->duplex;
+
+ if (!link_changed)
+ return;
+
+ if (link_changed > 0)
+ lmac->link_up = true;
+ else
+ lmac->link_up = false;
+
+ if (lmac->is_sgmii)
+ bgx_sgmii_change_link_state(lmac);
+ else
+ bgx_xaui_check_link(lmac);
+}
+
+u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx)
+{
+ struct bgx *bgx;
+
+ bgx = get_bgx(node, bgx_idx);
+ if (!bgx)
+ return 0;
+
+ if (idx > 8)
+ lmac = 0;
+ return bgx_reg_read(bgx, lmac, BGX_CMRX_RX_STAT0 + (idx * 8));
+}
+EXPORT_SYMBOL(bgx_get_rx_stats);
+
+u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx)
+{
+ struct bgx *bgx;
+
+ bgx = get_bgx(node, bgx_idx);
+ if (!bgx)
+ return 0;
+
+ return bgx_reg_read(bgx, lmac, BGX_CMRX_TX_STAT0 + (idx * 8));
+}
+EXPORT_SYMBOL(bgx_get_tx_stats);
+
+/* Configure BGX LMAC in internal loopback mode */
+void bgx_lmac_internal_loopback(int node, int bgx_idx,
+ int lmac_idx, bool enable)
+{
+ struct bgx *bgx;
+ struct lmac *lmac;
+ u64 cfg;
+
+ bgx = get_bgx(node, bgx_idx);
+ if (!bgx)
+ return;
+
+ lmac = &bgx->lmac[lmac_idx];
+ if (lmac->is_sgmii) {
+ cfg = bgx_reg_read(bgx, lmac_idx, BGX_GMP_PCS_MRX_CTL);
+ if (enable)
+ cfg |= PCS_MRX_CTL_LOOPBACK1;
+ else
+ cfg &= ~PCS_MRX_CTL_LOOPBACK1;
+ bgx_reg_write(bgx, lmac_idx, BGX_GMP_PCS_MRX_CTL, cfg);
+ } else {
+ cfg = bgx_reg_read(bgx, lmac_idx, BGX_SPUX_CONTROL1);
+ if (enable)
+ cfg |= SPU_CTL_LOOPBACK;
+ else
+ cfg &= ~SPU_CTL_LOOPBACK;
+ bgx_reg_write(bgx, lmac_idx, BGX_SPUX_CONTROL1, cfg);
+ }
+}
+EXPORT_SYMBOL(bgx_lmac_internal_loopback);
+
+static int bgx_lmac_sgmii_init(struct bgx *bgx, struct lmac *lmac)
+{
+ int lmacid = lmac->lmacid;
+ u64 cfg;
+
+ bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_THRESH, 0x30);
+ /* max packet size */
+ bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_RXX_JABBER, MAX_FRAME_SIZE);
+
+ /* Disable frame alignment if using preamble */
+ cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_GMI_TXX_APPEND);
+ if (cfg & 1)
+ bgx_reg_write(bgx, lmacid, BGX_GMP_GMI_TXX_SGMII_CTL, 0);
+
+ /* Enable lmac */
+ bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
+
+ /* PCS reset */
+ bgx_reg_modify(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, PCS_MRX_CTL_RESET);
+ if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_CTL,
+ PCS_MRX_CTL_RESET, true)) {
+ dev_err(&bgx->pdev->dev, "BGX PCS reset not completed\n");
+ return -1;
+ }
+
+ /* power down, reset autoneg, autoneg enable */
+ cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_PCS_MRX_CTL);
+ cfg &= ~PCS_MRX_CTL_PWR_DN;
+ cfg |= PCS_MRX_CTL_RST_AN;
+ if (lmac->phydev) {
+ cfg |= PCS_MRX_CTL_AN_EN;
+ } else {
+ /* In scenarios where PHY driver is not present or it's a
+ * non-standard PHY, FW sets AN_EN to inform Linux driver
+ * to do auto-neg and link polling or not.
+ */
+ if (cfg & PCS_MRX_CTL_AN_EN)
+ lmac->autoneg = true;
+ }
+ bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, cfg);
+
+ if (lmac->lmac_type == BGX_MODE_QSGMII) {
+ /* Disable disparity check for QSGMII */
+ cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_PCS_MISCX_CTL);
+ cfg &= ~PCS_MISC_CTL_DISP_EN;
+ bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MISCX_CTL, cfg);
+ return 0;
+ }
+
+ if ((lmac->lmac_type == BGX_MODE_SGMII) && lmac->phydev) {
+ if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS,
+ PCS_MRX_STATUS_AN_CPT, false)) {
+ dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int bgx_lmac_xaui_init(struct bgx *bgx, struct lmac *lmac)
+{
+ u64 cfg;
+ int lmacid = lmac->lmacid;
+
+ /* Reset SPU */
+ bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET);
+ if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET, true)) {
+ dev_err(&bgx->pdev->dev, "BGX SPU reset not completed\n");
+ return -1;
+ }
+
+ /* Disable LMAC */
+ cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+ cfg &= ~CMR_EN;
+ bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+ bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER);
+ /* Set interleaved running disparity for RXAUI */
+ if (lmac->lmac_type == BGX_MODE_RXAUI)
+ bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL,
+ SPU_MISC_CTL_INTLV_RDISP);
+
+ /* Clear receive packet disable */
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL);
+ cfg &= ~SPU_MISC_CTL_RX_DIS;
+ bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg);
+
+ /* clear all interrupts */
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_INT);
+ bgx_reg_write(bgx, lmacid, BGX_SMUX_RX_INT, cfg);
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_INT);
+ bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_INT, cfg);
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
+ bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg);
+
+ if (lmac->use_training) {
+ bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LP_CUP, 0x00);
+ bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_CUP, 0x00);
+ bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_REP, 0x00);
+ /* training enable */
+ bgx_reg_modify(bgx, lmacid,
+ BGX_SPUX_BR_PMD_CRTL, SPU_PMD_CRTL_TRAIN_EN);
+ }
+
+ /* Append FCS to each packet */
+ bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_APPEND, SMU_TX_APPEND_FCS_D);
+
+ /* Disable forward error correction */
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_FEC_CONTROL);
+ cfg &= ~SPU_FEC_CTL_FEC_EN;
+ bgx_reg_write(bgx, lmacid, BGX_SPUX_FEC_CONTROL, cfg);
+
+ /* Disable autoneg */
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_CONTROL);
+ cfg = cfg & ~(SPU_AN_CTL_AN_EN | SPU_AN_CTL_XNP_EN);
+ bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_CONTROL, cfg);
+
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_ADV);
+ if (lmac->lmac_type == BGX_MODE_10G_KR)
+ cfg |= (1 << 23);
+ else if (lmac->lmac_type == BGX_MODE_40G_KR)
+ cfg |= (1 << 24);
+ else
+ cfg &= ~((1 << 23) | (1 << 24));
+ cfg = cfg & (~((1ULL << 25) | (1ULL << 22) | (1ULL << 12)));
+ bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_ADV, cfg);
+
+ cfg = bgx_reg_read(bgx, 0, BGX_SPU_DBG_CONTROL);
+ cfg &= ~SPU_DBG_CTL_AN_ARB_LINK_CHK_EN;
+ bgx_reg_write(bgx, 0, BGX_SPU_DBG_CONTROL, cfg);
+
+ /* Enable lmac */
+ bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
+
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_CONTROL1);
+ cfg &= ~SPU_CTL_LOW_POWER;
+ bgx_reg_write(bgx, lmacid, BGX_SPUX_CONTROL1, cfg);
+
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_CTL);
+ cfg &= ~SMU_TX_CTL_UNI_EN;
+ cfg |= SMU_TX_CTL_DIC_EN;
+ bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_CTL, cfg);
+
+ /* Enable receive and transmission of pause frames */
+ bgx_reg_write(bgx, lmacid, BGX_SMUX_CBFC_CTL, ((0xffffULL << 32) |
+ BCK_EN | DRP_EN | TX_EN | RX_EN));
+ /* Configure pause time and interval */
+ bgx_reg_write(bgx, lmacid,
+ BGX_SMUX_TX_PAUSE_PKT_TIME, DEFAULT_PAUSE_TIME);
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_PAUSE_PKT_INTERVAL);
+ cfg &= ~0xFFFFull;
+ bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_PAUSE_PKT_INTERVAL,
+ cfg | (DEFAULT_PAUSE_TIME - 0x1000));
+ bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_PAUSE_ZERO, 0x01);
+
+ /* take lmac_count into account */
+ bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_THRESH, (0x100 - 1));
+ /* max packet size */
+ bgx_reg_modify(bgx, lmacid, BGX_SMUX_RX_JABBER, MAX_FRAME_SIZE);
+
+ return 0;
+}
+
+static int bgx_xaui_check_link(struct lmac *lmac)
+{
+ struct bgx *bgx = lmac->bgx;
+ int lmacid = lmac->lmacid;
+ int lmac_type = lmac->lmac_type;
+ u64 cfg;
+
+ if (lmac->use_training) {
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
+ if (!(cfg & (1ull << 13))) {
+ cfg = (1ull << 13) | (1ull << 14);
+ bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg);
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL);
+ cfg |= (1ull << 0);
+ bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL, cfg);
+ return -1;
+ }
+ }
+
+ /* wait for PCS to come out of reset */
+ if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET, true)) {
+ dev_err(&bgx->pdev->dev, "BGX SPU reset not completed\n");
+ return -1;
+ }
+
+ if ((lmac_type == BGX_MODE_10G_KR) || (lmac_type == BGX_MODE_XFI) ||
+ (lmac_type == BGX_MODE_40G_KR) || (lmac_type == BGX_MODE_XLAUI)) {
+ if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_BR_STATUS1,
+ SPU_BR_STATUS_BLK_LOCK, false)) {
+ dev_err(&bgx->pdev->dev,
+ "SPU_BR_STATUS_BLK_LOCK not completed\n");
+ return -1;
+ }
+ } else {
+ if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_BX_STATUS,
+ SPU_BX_STATUS_RX_ALIGN, false)) {
+ dev_err(&bgx->pdev->dev,
+ "SPU_BX_STATUS_RX_ALIGN not completed\n");
+ return -1;
+ }
+ }
+
+ /* Clear rcvflt bit (latching high) and read it back */
+ if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT)
+ bgx_reg_modify(bgx, lmacid,
+ BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
+ if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
+ dev_err(&bgx->pdev->dev, "Receive fault, retry training\n");
+ if (lmac->use_training) {
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
+ if (!(cfg & (1ull << 13))) {
+ cfg = (1ull << 13) | (1ull << 14);
+ bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg);
+ cfg = bgx_reg_read(bgx, lmacid,
+ BGX_SPUX_BR_PMD_CRTL);
+ cfg |= (1ull << 0);
+ bgx_reg_write(bgx, lmacid,
+ BGX_SPUX_BR_PMD_CRTL, cfg);
+ return -1;
+ }
+ }
+ return -1;
+ }
+
+ /* Wait for BGX RX to be idle */
+ if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_RX_IDLE, false)) {
+ dev_err(&bgx->pdev->dev, "SMU RX not idle\n");
+ return -1;
+ }
+
+ /* Wait for BGX TX to be idle */
+ if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_TX_IDLE, false)) {
+ dev_err(&bgx->pdev->dev, "SMU TX not idle\n");
+ return -1;
+ }
+
+ /* Check for MAC RX faults */
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_CTL);
+ /* 0 - Link is okay, 1 - Local fault, 2 - Remote fault */
+ cfg &= SMU_RX_CTL_STATUS;
+ if (!cfg)
+ return 0;
+
+ /* Rx local/remote fault seen.
+ * Do lmac reinit to see if condition recovers
+ */
+ bgx_lmac_xaui_init(bgx, lmac);
+
+ return -1;
+}
+
+static void bgx_poll_for_sgmii_link(struct lmac *lmac)
+{
+ u64 pcs_link, an_result;
+ u8 speed;
+
+ pcs_link = bgx_reg_read(lmac->bgx, lmac->lmacid,
+ BGX_GMP_PCS_MRX_STATUS);
+
+ /*Link state bit is sticky, read it again*/
+ if (!(pcs_link & PCS_MRX_STATUS_LINK))
+ pcs_link = bgx_reg_read(lmac->bgx, lmac->lmacid,
+ BGX_GMP_PCS_MRX_STATUS);
+
+ if (bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_GMP_PCS_MRX_STATUS,
+ PCS_MRX_STATUS_AN_CPT, false)) {
+ lmac->link_up = false;
+ lmac->last_speed = SPEED_UNKNOWN;
+ lmac->last_duplex = DUPLEX_UNKNOWN;
+ goto next_poll;
+ }
+
+ lmac->link_up = ((pcs_link & PCS_MRX_STATUS_LINK) != 0) ? true : false;
+ an_result = bgx_reg_read(lmac->bgx, lmac->lmacid,
+ BGX_GMP_PCS_ANX_AN_RESULTS);
+
+ speed = (an_result >> 3) & 0x3;
+ lmac->last_duplex = (an_result >> 1) & 0x1;
+ switch (speed) {
+ case 0:
+ lmac->last_speed = 10;
+ break;
+ case 1:
+ lmac->last_speed = 100;
+ break;
+ case 2:
+ lmac->last_speed = 1000;
+ break;
+ default:
+ lmac->link_up = false;
+ lmac->last_speed = SPEED_UNKNOWN;
+ lmac->last_duplex = DUPLEX_UNKNOWN;
+ break;
+ }
+
+next_poll:
+
+ if (lmac->last_link != lmac->link_up) {
+ if (lmac->link_up)
+ bgx_sgmii_change_link_state(lmac);
+ lmac->last_link = lmac->link_up;
+ }
+
+ queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 3);
+}
+
+static void bgx_poll_for_link(struct work_struct *work)
+{
+ struct lmac *lmac;
+ u64 spu_link, smu_link;
+
+ lmac = container_of(work, struct lmac, dwork.work);
+ if (lmac->is_sgmii) {
+ bgx_poll_for_sgmii_link(lmac);
+ return;
+ }
+
+ /* Receive link is latching low. Force it high and verify it */
+ bgx_reg_modify(lmac->bgx, lmac->lmacid,
+ BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK);
+ bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1,
+ SPU_STATUS1_RCV_LNK, false);
+
+ spu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1);
+ smu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SMUX_RX_CTL);
+
+ if ((spu_link & SPU_STATUS1_RCV_LNK) &&
+ !(smu_link & SMU_RX_CTL_STATUS)) {
+ lmac->link_up = 1;
+ if (lmac->lmac_type == BGX_MODE_XLAUI)
+ lmac->last_speed = 40000;
+ else
+ lmac->last_speed = 10000;
+ lmac->last_duplex = 1;
+ } else {
+ lmac->link_up = 0;
+ lmac->last_speed = SPEED_UNKNOWN;
+ lmac->last_duplex = DUPLEX_UNKNOWN;
+ }
+
+ if (lmac->last_link != lmac->link_up) {
+ if (lmac->link_up) {
+ if (bgx_xaui_check_link(lmac)) {
+ /* Errors, clear link_up state */
+ lmac->link_up = 0;
+ lmac->last_speed = SPEED_UNKNOWN;
+ lmac->last_duplex = DUPLEX_UNKNOWN;
+ }
+ }
+ lmac->last_link = lmac->link_up;
+ }
+
+ queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2);
+}
+
+static int phy_interface_mode(u8 lmac_type)
+{
+ if (lmac_type == BGX_MODE_QSGMII)
+ return PHY_INTERFACE_MODE_QSGMII;
+ if (lmac_type == BGX_MODE_RGMII)
+ return PHY_INTERFACE_MODE_RGMII;
+
+ return PHY_INTERFACE_MODE_SGMII;
+}
+
+static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
+{
+ struct lmac *lmac;
+ u64 cfg;
+
+ lmac = &bgx->lmac[lmacid];
+ lmac->bgx = bgx;
+
+ if ((lmac->lmac_type == BGX_MODE_SGMII) ||
+ (lmac->lmac_type == BGX_MODE_QSGMII) ||
+ (lmac->lmac_type == BGX_MODE_RGMII)) {
+ lmac->is_sgmii = 1;
+ if (bgx_lmac_sgmii_init(bgx, lmac))
+ return -1;
+ } else {
+ lmac->is_sgmii = 0;
+ if (bgx_lmac_xaui_init(bgx, lmac))
+ return -1;
+ }
+
+ if (lmac->is_sgmii) {
+ cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_GMI_TXX_APPEND);
+ cfg |= ((1ull << 2) | (1ull << 1)); /* FCS and PAD */
+ bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_APPEND, cfg);
+ bgx_reg_write(bgx, lmacid, BGX_GMP_GMI_TXX_MIN_PKT, 60 - 1);
+ } else {
+ cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_APPEND);
+ cfg |= ((1ull << 2) | (1ull << 1)); /* FCS and PAD */
+ bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_APPEND, cfg);
+ bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_MIN_PKT, 60 + 4);
+ }
+
+ /* actual number of filters available to exact LMAC */
+ lmac->dmacs_count = (RX_DMAC_COUNT / bgx->lmac_count);
+ lmac->dmacs = kcalloc(lmac->dmacs_count, sizeof(*lmac->dmacs),
+ GFP_KERNEL);
+ if (!lmac->dmacs)
+ return -ENOMEM;
+
+ /* Enable lmac */
+ bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
+
+ /* Restore default cfg, incase low level firmware changed it */
+ bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03);
+
+ if ((lmac->lmac_type != BGX_MODE_XFI) &&
+ (lmac->lmac_type != BGX_MODE_XLAUI) &&
+ (lmac->lmac_type != BGX_MODE_40G_KR) &&
+ (lmac->lmac_type != BGX_MODE_10G_KR)) {
+ if (!lmac->phydev) {
+ if (lmac->autoneg) {
+ bgx_reg_write(bgx, lmacid,
+ BGX_GMP_PCS_LINKX_TIMER,
+ PCS_LINKX_TIMER_COUNT);
+ goto poll;
+ } else {
+ /* Default to below link speed and duplex */
+ lmac->link_up = true;
+ lmac->last_speed = 1000;
+ lmac->last_duplex = 1;
+ bgx_sgmii_change_link_state(lmac);
+ return 0;
+ }
+ }
+ lmac->phydev->dev_flags = 0;
+
+ if (phy_connect_direct(&lmac->netdev, lmac->phydev,
+ bgx_lmac_handler,
+ phy_interface_mode(lmac->lmac_type)))
+ return -ENODEV;
+
+ phy_start(lmac->phydev);
+ return 0;
+ }
+
+poll:
+ lmac->check_link = alloc_workqueue("check_link", WQ_UNBOUND |
+ WQ_MEM_RECLAIM, 1);
+ if (!lmac->check_link)
+ return -ENOMEM;
+ INIT_DELAYED_WORK(&lmac->dwork, bgx_poll_for_link);
+ queue_delayed_work(lmac->check_link, &lmac->dwork, 0);
+
+ return 0;
+}
+
+static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
+{
+ struct lmac *lmac;
+ u64 cfg;
+
+ lmac = &bgx->lmac[lmacid];
+ if (lmac->check_link) {
+ /* Destroy work queue */
+ cancel_delayed_work_sync(&lmac->dwork);
+ destroy_workqueue(lmac->check_link);
+ }
+
+ /* Disable packet reception */
+ cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+ cfg &= ~CMR_PKT_RX_EN;
+ bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+ /* Give chance for Rx/Tx FIFO to get drained */
+ bgx_poll_reg(bgx, lmacid, BGX_CMRX_RX_FIFO_LEN, (u64)0x1FFF, true);
+ bgx_poll_reg(bgx, lmacid, BGX_CMRX_TX_FIFO_LEN, (u64)0x3FFF, true);
+
+ /* Disable packet transmission */
+ cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+ cfg &= ~CMR_PKT_TX_EN;
+ bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+ /* Disable serdes lanes */
+ if (!lmac->is_sgmii)
+ bgx_reg_modify(bgx, lmacid,
+ BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER);
+ else
+ bgx_reg_modify(bgx, lmacid,
+ BGX_GMP_PCS_MRX_CTL, PCS_MRX_CTL_PWR_DN);
+
+ /* Disable LMAC */
+ cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+ cfg &= ~CMR_EN;
+ bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+ bgx_flush_dmac_cam_filter(bgx, lmacid);
+ kfree(lmac->dmacs);
+
+ if ((lmac->lmac_type != BGX_MODE_XFI) &&
+ (lmac->lmac_type != BGX_MODE_XLAUI) &&
+ (lmac->lmac_type != BGX_MODE_40G_KR) &&
+ (lmac->lmac_type != BGX_MODE_10G_KR) && lmac->phydev)
+ phy_disconnect(lmac->phydev);
+
+ lmac->phydev = NULL;
+}
+
+static void bgx_init_hw(struct bgx *bgx)
+{
+ int i;
+ struct lmac *lmac;
+
+ bgx_reg_modify(bgx, 0, BGX_CMR_GLOBAL_CFG, CMR_GLOBAL_CFG_FCS_STRIP);
+ if (bgx_reg_read(bgx, 0, BGX_CMR_BIST_STATUS))
+ dev_err(&bgx->pdev->dev, "BGX%d BIST failed\n", bgx->bgx_id);
+
+ /* Set lmac type and lane2serdes mapping */
+ for (i = 0; i < bgx->lmac_count; i++) {
+ lmac = &bgx->lmac[i];
+ bgx_reg_write(bgx, i, BGX_CMRX_CFG,
+ (lmac->lmac_type << 8) | lmac->lane_to_sds);
+ bgx->lmac[i].lmacid_bd = lmac_count;
+ lmac_count++;
+ }
+
+ bgx_reg_write(bgx, 0, BGX_CMR_TX_LMACS, bgx->lmac_count);
+ bgx_reg_write(bgx, 0, BGX_CMR_RX_LMACS, bgx->lmac_count);
+
+ /* Set the backpressure AND mask */
+ for (i = 0; i < bgx->lmac_count; i++)
+ bgx_reg_modify(bgx, 0, BGX_CMR_CHAN_MSK_AND,
+ ((1ULL << MAX_BGX_CHANS_PER_LMAC) - 1) <<
+ (i * MAX_BGX_CHANS_PER_LMAC));
+
+ /* Disable all MAC filtering */
+ for (i = 0; i < RX_DMAC_COUNT; i++)
+ bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + (i * 8), 0x00);
+
+ /* Disable MAC steering (NCSI traffic) */
+ for (i = 0; i < RX_TRAFFIC_STEER_RULE_COUNT; i++)
+ bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00);
+}
+
+static u8 bgx_get_lane2sds_cfg(struct bgx *bgx, struct lmac *lmac)
+{
+ return (u8)(bgx_reg_read(bgx, lmac->lmacid, BGX_CMRX_CFG) & 0xFF);
+}
+
+static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid)
+{
+ struct device *dev = &bgx->pdev->dev;
+ struct lmac *lmac;
+ char str[27];
+
+ if (!bgx->is_dlm && lmacid)
+ return;
+
+ lmac = &bgx->lmac[lmacid];
+ if (!bgx->is_dlm)
+ sprintf(str, "BGX%d QLM mode", bgx->bgx_id);
+ else
+ sprintf(str, "BGX%d LMAC%d mode", bgx->bgx_id, lmacid);
+
+ switch (lmac->lmac_type) {
+ case BGX_MODE_SGMII:
+ dev_info(dev, "%s: SGMII\n", (char *)str);
+ break;
+ case BGX_MODE_XAUI:
+ dev_info(dev, "%s: XAUI\n", (char *)str);
+ break;
+ case BGX_MODE_RXAUI:
+ dev_info(dev, "%s: RXAUI\n", (char *)str);
+ break;
+ case BGX_MODE_XFI:
+ if (!lmac->use_training)
+ dev_info(dev, "%s: XFI\n", (char *)str);
+ else
+ dev_info(dev, "%s: 10G_KR\n", (char *)str);
+ break;
+ case BGX_MODE_XLAUI:
+ if (!lmac->use_training)
+ dev_info(dev, "%s: XLAUI\n", (char *)str);
+ else
+ dev_info(dev, "%s: 40G_KR4\n", (char *)str);
+ break;
+ case BGX_MODE_QSGMII:
+ dev_info(dev, "%s: QSGMII\n", (char *)str);
+ break;
+ case BGX_MODE_RGMII:
+ dev_info(dev, "%s: RGMII\n", (char *)str);
+ break;
+ case BGX_MODE_INVALID:
+ /* Nothing to do */
+ break;
+ }
+}
+
+static void lmac_set_lane2sds(struct bgx *bgx, struct lmac *lmac)
+{
+ switch (lmac->lmac_type) {
+ case BGX_MODE_SGMII:
+ case BGX_MODE_XFI:
+ lmac->lane_to_sds = lmac->lmacid;
+ break;
+ case BGX_MODE_XAUI:
+ case BGX_MODE_XLAUI:
+ case BGX_MODE_RGMII:
+ lmac->lane_to_sds = 0xE4;
+ break;
+ case BGX_MODE_RXAUI:
+ lmac->lane_to_sds = (lmac->lmacid) ? 0xE : 0x4;
+ break;
+ case BGX_MODE_QSGMII:
+ /* There is no way to determine if DLM0/2 is QSGMII or
+ * DLM1/3 is configured to QSGMII as bootloader will
+ * configure all LMACs, so take whatever is configured
+ * by low level firmware.
+ */
+ lmac->lane_to_sds = bgx_get_lane2sds_cfg(bgx, lmac);
+ break;
+ default:
+ lmac->lane_to_sds = 0;
+ break;
+ }
+}
+
+static void lmac_set_training(struct bgx *bgx, struct lmac *lmac, int lmacid)
+{
+ if ((lmac->lmac_type != BGX_MODE_10G_KR) &&
+ (lmac->lmac_type != BGX_MODE_40G_KR)) {
+ lmac->use_training = 0;
+ return;
+ }
+
+ lmac->use_training = bgx_reg_read(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL) &
+ SPU_PMD_CRTL_TRAIN_EN;
+}
+
+static void bgx_set_lmac_config(struct bgx *bgx, u8 idx)
+{
+ struct lmac *lmac;
+ u64 cmr_cfg;
+ u8 lmac_type;
+ u8 lane_to_sds;
+
+ lmac = &bgx->lmac[idx];
+
+ if (!bgx->is_dlm || bgx->is_rgx) {
+ /* Read LMAC0 type to figure out QLM mode
+ * This is configured by low level firmware
+ */
+ cmr_cfg = bgx_reg_read(bgx, 0, BGX_CMRX_CFG);
+ lmac->lmac_type = (cmr_cfg >> 8) & 0x07;
+ if (bgx->is_rgx)
+ lmac->lmac_type = BGX_MODE_RGMII;
+ lmac_set_training(bgx, lmac, 0);
+ lmac_set_lane2sds(bgx, lmac);
+ return;
+ }
+
+ /* For DLMs or SLMs on 80/81/83xx so many lane configurations
+ * are possible and vary across boards. Also Kernel doesn't have
+ * any way to identify board type/info and since firmware does,
+ * just take lmac type and serdes lane config as is.
+ */
+ cmr_cfg = bgx_reg_read(bgx, idx, BGX_CMRX_CFG);
+ lmac_type = (u8)((cmr_cfg >> 8) & 0x07);
+ lane_to_sds = (u8)(cmr_cfg & 0xFF);
+ /* Check if config is reset value */
+ if ((lmac_type == 0) && (lane_to_sds == 0xE4))
+ lmac->lmac_type = BGX_MODE_INVALID;
+ else
+ lmac->lmac_type = lmac_type;
+ lmac->lane_to_sds = lane_to_sds;
+ lmac_set_training(bgx, lmac, lmac->lmacid);
+}
+
+static void bgx_get_qlm_mode(struct bgx *bgx)
+{
+ struct lmac *lmac;
+ u8 idx;
+
+ /* Init all LMAC's type to invalid */
+ for (idx = 0; idx < bgx->max_lmac; idx++) {
+ lmac = &bgx->lmac[idx];
+ lmac->lmacid = idx;
+ lmac->lmac_type = BGX_MODE_INVALID;
+ lmac->use_training = false;
+ }
+
+ /* It is assumed that low level firmware sets this value */
+ bgx->lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7;
+ if (bgx->lmac_count > bgx->max_lmac)
+ bgx->lmac_count = bgx->max_lmac;
+
+ for (idx = 0; idx < bgx->lmac_count; idx++) {
+ bgx_set_lmac_config(bgx, idx);
+ bgx_print_qlm_mode(bgx, idx);
+ }
+}
+
+#ifdef CONFIG_ACPI
+
+static int acpi_get_mac_address(struct device *dev, struct acpi_device *adev,
+ u8 *dst)
+{
+ u8 mac[ETH_ALEN];
+ int ret;
+
+ ret = fwnode_property_read_u8_array(acpi_fwnode_handle(adev),
+ "mac-address", mac, ETH_ALEN);
+ if (ret)
+ goto out;
+
+ if (!is_valid_ether_addr(mac)) {
+ dev_err(dev, "MAC address invalid: %pM\n", mac);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ dev_info(dev, "MAC address set to: %pM\n", mac);
+
+ memcpy(dst, mac, ETH_ALEN);
+out:
+ return ret;
+}
+
+/* Currently only sets the MAC address. */
+static acpi_status bgx_acpi_register_phy(acpi_handle handle,
+ u32 lvl, void *context, void **rv)
+{
+ struct bgx *bgx = context;
+ struct device *dev = &bgx->pdev->dev;
+ struct acpi_device *adev;
+
+ if (acpi_bus_get_device(handle, &adev))
+ goto out;
+
+ acpi_get_mac_address(dev, adev, bgx->lmac[bgx->acpi_lmac_idx].mac);
+
+ SET_NETDEV_DEV(&bgx->lmac[bgx->acpi_lmac_idx].netdev, dev);
+
+ bgx->lmac[bgx->acpi_lmac_idx].lmacid = bgx->acpi_lmac_idx;
+ bgx->acpi_lmac_idx++; /* move to next LMAC */
+out:
+ return AE_OK;
+}
+
+static acpi_status bgx_acpi_match_id(acpi_handle handle, u32 lvl,
+ void *context, void **ret_val)
+{
+ struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct bgx *bgx = context;
+ char bgx_sel[5];
+
+ snprintf(bgx_sel, 5, "BGX%d", bgx->bgx_id);
+ if (ACPI_FAILURE(acpi_get_name(handle, ACPI_SINGLE_NAME, &string))) {
+ pr_warn("Invalid link device\n");
+ return AE_OK;
+ }
+
+ if (strncmp(string.pointer, bgx_sel, 4))
+ return AE_OK;
+
+ acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+ bgx_acpi_register_phy, NULL, bgx, NULL);
+
+ kfree(string.pointer);
+ return AE_CTRL_TERMINATE;
+}
+
+static int bgx_init_acpi_phy(struct bgx *bgx)
+{
+ acpi_get_devices(NULL, bgx_acpi_match_id, bgx, (void **)NULL);
+ return 0;
+}
+
+#else
+
+static int bgx_init_acpi_phy(struct bgx *bgx)
+{
+ return -ENODEV;
+}
+
+#endif /* CONFIG_ACPI */
+
+#if IS_ENABLED(CONFIG_OF_MDIO)
+
+static int bgx_init_of_phy(struct bgx *bgx)
+{
+ struct fwnode_handle *fwn;
+ struct device_node *node = NULL;
+ u8 lmac = 0;
+
+ device_for_each_child_node(&bgx->pdev->dev, fwn) {
+ struct phy_device *pd;
+ struct device_node *phy_np;
+ const char *mac;
+
+ /* Should always be an OF node. But if it is not, we
+ * cannot handle it, so exit the loop.
+ */
+ node = to_of_node(fwn);
+ if (!node)
+ break;
+
+ mac = of_get_mac_address(node);
+ if (mac)
+ ether_addr_copy(bgx->lmac[lmac].mac, mac);
+
+ SET_NETDEV_DEV(&bgx->lmac[lmac].netdev, &bgx->pdev->dev);
+ bgx->lmac[lmac].lmacid = lmac;
+
+ phy_np = of_parse_phandle(node, "phy-handle", 0);
+ /* If there is no phy or defective firmware presents
+ * this cortina phy, for which there is no driver
+ * support, ignore it.
+ */
+ if (phy_np &&
+ !of_device_is_compatible(phy_np, "cortina,cs4223-slice")) {
+ /* Wait until the phy drivers are available */
+ pd = of_phy_find_device(phy_np);
+ if (!pd)
+ goto defer;
+ bgx->lmac[lmac].phydev = pd;
+ }
+
+ lmac++;
+ if (lmac == bgx->max_lmac) {
+ of_node_put(node);
+ break;
+ }
+ }
+ return 0;
+
+defer:
+ /* We are bailing out, try not to leak device reference counts
+ * for phy devices we may have already found.
+ */
+ while (lmac) {
+ if (bgx->lmac[lmac].phydev) {
+ put_device(&bgx->lmac[lmac].phydev->mdio.dev);
+ bgx->lmac[lmac].phydev = NULL;
+ }
+ lmac--;
+ }
+ of_node_put(node);
+ return -EPROBE_DEFER;
+}
+
+#else
+
+static int bgx_init_of_phy(struct bgx *bgx)
+{
+ return -ENODEV;
+}
+
+#endif /* CONFIG_OF_MDIO */
+
+static int bgx_init_phy(struct bgx *bgx)
+{
+ if (!acpi_disabled)
+ return bgx_init_acpi_phy(bgx);
+
+ return bgx_init_of_phy(bgx);
+}
+
+static irqreturn_t bgx_intr_handler(int irq, void *data)
+{
+ struct bgx *bgx = (struct bgx *)data;
+ u64 status, val;
+ int lmac;
+
+ for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
+ status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT);
+ if (status & GMI_TXX_INT_UNDFLW) {
+ pci_err(bgx->pdev, "BGX%d lmac%d UNDFLW\n",
+ bgx->bgx_id, lmac);
+ val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG);
+ val &= ~CMR_EN;
+ bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val);
+ val |= CMR_EN;
+ bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val);
+ }
+ /* clear interrupts */
+ bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void bgx_register_intr(struct pci_dev *pdev)
+{
+ struct bgx *bgx = pci_get_drvdata(pdev);
+ int ret;
+
+ ret = pci_alloc_irq_vectors(pdev, BGX_LMAC_VEC_OFFSET,
+ BGX_LMAC_VEC_OFFSET, PCI_IRQ_ALL_TYPES);
+ if (ret < 0) {
+ pci_err(pdev, "Req for #%d msix vectors failed\n",
+ BGX_LMAC_VEC_OFFSET);
+ return;
+ }
+ ret = pci_request_irq(pdev, GMPX_GMI_TX_INT, bgx_intr_handler, NULL,
+ bgx, "BGX%d", bgx->bgx_id);
+ if (ret)
+ pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx);
+}
+
+static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ int err;
+ struct device *dev = &pdev->dev;
+ struct bgx *bgx = NULL;
+ u8 lmac;
+ u16 sdevid;
+
+ bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL);
+ if (!bgx)
+ return -ENOMEM;
+ bgx->pdev = pdev;
+
+ pci_set_drvdata(pdev, bgx);
+
+ err = pcim_enable_device(pdev);
+ if (err) {
+ dev_err(dev, "Failed to enable PCI device\n");
+ pci_set_drvdata(pdev, NULL);
+ return err;
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ dev_err(dev, "PCI request regions failed 0x%x\n", err);
+ goto err_disable_device;
+ }
+
+ /* MAP configuration registers */
+ bgx->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+ if (!bgx->reg_base) {
+ dev_err(dev, "BGX: Cannot map CSR memory space, aborting\n");
+ err = -ENOMEM;
+ goto err_release_regions;
+ }
+
+ set_max_bgx_per_node(pdev);
+
+ pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid);
+ if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) {
+ bgx->bgx_id = (pci_resource_start(pdev,
+ PCI_CFG_REG_BAR_NUM) >> 24) & BGX_ID_MASK;
+ bgx->bgx_id += nic_get_node_id(pdev) * max_bgx_per_node;
+ bgx->max_lmac = MAX_LMAC_PER_BGX;
+ bgx_vnic[bgx->bgx_id] = bgx;
+ } else {
+ bgx->is_rgx = true;
+ bgx->max_lmac = 1;
+ bgx->bgx_id = MAX_BGX_PER_CN81XX - 1;
+ bgx_vnic[bgx->bgx_id] = bgx;
+ xcv_init_hw();
+ }
+
+ /* On 81xx all are DLMs and on 83xx there are 3 BGX QLMs and one
+ * BGX i.e BGX2 can be split across 2 DLMs.
+ */
+ pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
+ if ((sdevid == PCI_SUBSYS_DEVID_81XX_BGX) ||
+ ((sdevid == PCI_SUBSYS_DEVID_83XX_BGX) && (bgx->bgx_id == 2)))
+ bgx->is_dlm = true;
+
+ bgx_get_qlm_mode(bgx);
+
+ err = bgx_init_phy(bgx);
+ if (err)
+ goto err_enable;
+
+ bgx_init_hw(bgx);
+
+ bgx_register_intr(pdev);
+
+ /* Enable all LMACs */
+ for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
+ err = bgx_lmac_enable(bgx, lmac);
+ if (err) {
+ dev_err(dev, "BGX%d failed to enable lmac%d\n",
+ bgx->bgx_id, lmac);
+ while (lmac)
+ bgx_lmac_disable(bgx, --lmac);
+ goto err_enable;
+ }
+ }
+
+ return 0;
+
+err_enable:
+ bgx_vnic[bgx->bgx_id] = NULL;
+ pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx);
+err_release_regions:
+ pci_release_regions(pdev);
+err_disable_device:
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ return err;
+}
+
+static void bgx_remove(struct pci_dev *pdev)
+{
+ struct bgx *bgx = pci_get_drvdata(pdev);
+ u8 lmac;
+
+ /* Disable all LMACs */
+ for (lmac = 0; lmac < bgx->lmac_count; lmac++)
+ bgx_lmac_disable(bgx, lmac);
+
+ pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx);
+
+ bgx_vnic[bgx->bgx_id] = NULL;
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+static struct pci_driver bgx_driver = {
+ .name = DRV_NAME,
+ .id_table = bgx_id_table,
+ .probe = bgx_probe,
+ .remove = bgx_remove,
+};
+
+static int __init bgx_init_module(void)
+{
+ pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
+
+ return pci_register_driver(&bgx_driver);
+}
+
+static void __exit bgx_cleanup_module(void)
+{
+ pci_unregister_driver(&bgx_driver);
+}
+
+module_init(bgx_init_module);
+module_exit(bgx_cleanup_module);
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
new file mode 100644
index 000000000..ac0c89cd5
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2015 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef THUNDER_BGX_H
+#define THUNDER_BGX_H
+
+/* PCI device ID */
+#define PCI_DEVICE_ID_THUNDER_BGX 0xA026
+#define PCI_DEVICE_ID_THUNDER_RGX 0xA054
+
+/* Subsystem device IDs */
+#define PCI_SUBSYS_DEVID_88XX_BGX 0xA126
+#define PCI_SUBSYS_DEVID_81XX_BGX 0xA226
+#define PCI_SUBSYS_DEVID_81XX_RGX 0xA254
+#define PCI_SUBSYS_DEVID_83XX_BGX 0xA326
+
+#define MAX_BGX_THUNDER 8 /* Max 2 nodes, 4 per node */
+#define MAX_BGX_PER_CN88XX 2
+#define MAX_BGX_PER_CN81XX 3 /* 2 BGXs + 1 RGX */
+#define MAX_BGX_PER_CN83XX 4
+#define MAX_LMAC_PER_BGX 4
+#define MAX_BGX_CHANS_PER_LMAC 16
+#define MAX_DMAC_PER_LMAC 8
+#define MAX_FRAME_SIZE 9216
+#define DEFAULT_PAUSE_TIME 0xFFFF
+
+#define BGX_ID_MASK 0x3
+#define LMAC_ID_MASK 0x3
+
+#define MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE 2
+
+/* Registers */
+#define BGX_CMRX_CFG 0x00
+#define CMR_PKT_TX_EN BIT_ULL(13)
+#define CMR_PKT_RX_EN BIT_ULL(14)
+#define CMR_EN BIT_ULL(15)
+#define BGX_CMR_GLOBAL_CFG 0x08
+#define CMR_GLOBAL_CFG_FCS_STRIP BIT_ULL(6)
+#define BGX_CMRX_RX_ID_MAP 0x60
+#define BGX_CMRX_RX_STAT0 0x70
+#define BGX_CMRX_RX_STAT1 0x78
+#define BGX_CMRX_RX_STAT2 0x80
+#define BGX_CMRX_RX_STAT3 0x88
+#define BGX_CMRX_RX_STAT4 0x90
+#define BGX_CMRX_RX_STAT5 0x98
+#define BGX_CMRX_RX_STAT6 0xA0
+#define BGX_CMRX_RX_STAT7 0xA8
+#define BGX_CMRX_RX_STAT8 0xB0
+#define BGX_CMRX_RX_STAT9 0xB8
+#define BGX_CMRX_RX_STAT10 0xC0
+#define BGX_CMRX_RX_BP_DROP 0xC8
+#define BGX_CMRX_RX_DMAC_CTL 0x0E8
+#define BGX_CMRX_RX_FIFO_LEN 0x108
+#define BGX_CMR_RX_DMACX_CAM 0x200
+#define RX_DMACX_CAM_EN BIT_ULL(48)
+#define RX_DMACX_CAM_LMACID(x) (((u64)x) << 49)
+#define RX_DMAC_COUNT 32
+#define BGX_CMR_RX_STREERING 0x300
+#define RX_TRAFFIC_STEER_RULE_COUNT 8
+#define BGX_CMR_CHAN_MSK_AND 0x450
+#define BGX_CMR_BIST_STATUS 0x460
+#define BGX_CMR_RX_LMACS 0x468
+#define BGX_CMRX_TX_FIFO_LEN 0x518
+#define BGX_CMRX_TX_STAT0 0x600
+#define BGX_CMRX_TX_STAT1 0x608
+#define BGX_CMRX_TX_STAT2 0x610
+#define BGX_CMRX_TX_STAT3 0x618
+#define BGX_CMRX_TX_STAT4 0x620
+#define BGX_CMRX_TX_STAT5 0x628
+#define BGX_CMRX_TX_STAT6 0x630
+#define BGX_CMRX_TX_STAT7 0x638
+#define BGX_CMRX_TX_STAT8 0x640
+#define BGX_CMRX_TX_STAT9 0x648
+#define BGX_CMRX_TX_STAT10 0x650
+#define BGX_CMRX_TX_STAT11 0x658
+#define BGX_CMRX_TX_STAT12 0x660
+#define BGX_CMRX_TX_STAT13 0x668
+#define BGX_CMRX_TX_STAT14 0x670
+#define BGX_CMRX_TX_STAT15 0x678
+#define BGX_CMRX_TX_STAT16 0x680
+#define BGX_CMRX_TX_STAT17 0x688
+#define BGX_CMR_TX_LMACS 0x1000
+
+#define BGX_SPUX_CONTROL1 0x10000
+#define SPU_CTL_LOW_POWER BIT_ULL(11)
+#define SPU_CTL_LOOPBACK BIT_ULL(14)
+#define SPU_CTL_RESET BIT_ULL(15)
+#define BGX_SPUX_STATUS1 0x10008
+#define SPU_STATUS1_RCV_LNK BIT_ULL(2)
+#define BGX_SPUX_STATUS2 0x10020
+#define SPU_STATUS2_RCVFLT BIT_ULL(10)
+#define BGX_SPUX_BX_STATUS 0x10028
+#define SPU_BX_STATUS_RX_ALIGN BIT_ULL(12)
+#define BGX_SPUX_BR_STATUS1 0x10030
+#define SPU_BR_STATUS_BLK_LOCK BIT_ULL(0)
+#define SPU_BR_STATUS_RCV_LNK BIT_ULL(12)
+#define BGX_SPUX_BR_PMD_CRTL 0x10068
+#define SPU_PMD_CRTL_TRAIN_EN BIT_ULL(1)
+#define BGX_SPUX_BR_PMD_LP_CUP 0x10078
+#define BGX_SPUX_BR_PMD_LD_CUP 0x10088
+#define BGX_SPUX_BR_PMD_LD_REP 0x10090
+#define BGX_SPUX_FEC_CONTROL 0x100A0
+#define SPU_FEC_CTL_FEC_EN BIT_ULL(0)
+#define SPU_FEC_CTL_ERR_EN BIT_ULL(1)
+#define BGX_SPUX_AN_CONTROL 0x100C8
+#define SPU_AN_CTL_AN_EN BIT_ULL(12)
+#define SPU_AN_CTL_XNP_EN BIT_ULL(13)
+#define BGX_SPUX_AN_ADV 0x100D8
+#define BGX_SPUX_MISC_CONTROL 0x10218
+#define SPU_MISC_CTL_INTLV_RDISP BIT_ULL(10)
+#define SPU_MISC_CTL_RX_DIS BIT_ULL(12)
+#define BGX_SPUX_INT 0x10220 /* +(0..3) << 20 */
+#define BGX_SPUX_INT_W1S 0x10228
+#define BGX_SPUX_INT_ENA_W1C 0x10230
+#define BGX_SPUX_INT_ENA_W1S 0x10238
+#define BGX_SPU_DBG_CONTROL 0x10300
+#define SPU_DBG_CTL_AN_ARB_LINK_CHK_EN BIT_ULL(18)
+#define SPU_DBG_CTL_AN_NONCE_MCT_DIS BIT_ULL(29)
+
+#define BGX_SMUX_RX_INT 0x20000
+#define BGX_SMUX_RX_FRM_CTL 0x20020
+#define BGX_PKT_RX_PTP_EN BIT_ULL(12)
+#define BGX_SMUX_RX_JABBER 0x20030
+#define BGX_SMUX_RX_CTL 0x20048
+#define SMU_RX_CTL_STATUS (3ull << 0)
+#define BGX_SMUX_TX_APPEND 0x20100
+#define SMU_TX_APPEND_FCS_D BIT_ULL(2)
+#define BGX_SMUX_TX_PAUSE_PKT_TIME 0x20110
+#define BGX_SMUX_TX_MIN_PKT 0x20118
+#define BGX_SMUX_TX_PAUSE_PKT_INTERVAL 0x20120
+#define BGX_SMUX_TX_PAUSE_ZERO 0x20138
+#define BGX_SMUX_TX_INT 0x20140
+#define BGX_SMUX_TX_CTL 0x20178
+#define SMU_TX_CTL_DIC_EN BIT_ULL(0)
+#define SMU_TX_CTL_UNI_EN BIT_ULL(1)
+#define SMU_TX_CTL_LNK_STATUS (3ull << 4)
+#define BGX_SMUX_TX_THRESH 0x20180
+#define BGX_SMUX_CTL 0x20200
+#define SMU_CTL_RX_IDLE BIT_ULL(0)
+#define SMU_CTL_TX_IDLE BIT_ULL(1)
+#define BGX_SMUX_CBFC_CTL 0x20218
+#define RX_EN BIT_ULL(0)
+#define TX_EN BIT_ULL(1)
+#define BCK_EN BIT_ULL(2)
+#define DRP_EN BIT_ULL(3)
+
+#define BGX_GMP_PCS_MRX_CTL 0x30000
+#define PCS_MRX_CTL_RST_AN BIT_ULL(9)
+#define PCS_MRX_CTL_PWR_DN BIT_ULL(11)
+#define PCS_MRX_CTL_AN_EN BIT_ULL(12)
+#define PCS_MRX_CTL_LOOPBACK1 BIT_ULL(14)
+#define PCS_MRX_CTL_RESET BIT_ULL(15)
+#define BGX_GMP_PCS_MRX_STATUS 0x30008
+#define PCS_MRX_STATUS_LINK BIT_ULL(2)
+#define PCS_MRX_STATUS_AN_CPT BIT_ULL(5)
+#define BGX_GMP_PCS_ANX_ADV 0x30010
+#define BGX_GMP_PCS_ANX_AN_RESULTS 0x30020
+#define BGX_GMP_PCS_LINKX_TIMER 0x30040
+#define PCS_LINKX_TIMER_COUNT 0x1E84
+#define BGX_GMP_PCS_SGM_AN_ADV 0x30068
+#define BGX_GMP_PCS_MISCX_CTL 0x30078
+#define PCS_MISC_CTL_MODE BIT_ULL(8)
+#define PCS_MISC_CTL_DISP_EN BIT_ULL(13)
+#define PCS_MISC_CTL_GMX_ENO BIT_ULL(11)
+#define PCS_MISC_CTL_SAMP_PT_MASK 0x7Full
+#define BGX_GMP_GMI_PRTX_CFG 0x38020
+#define GMI_PORT_CFG_SPEED BIT_ULL(1)
+#define GMI_PORT_CFG_DUPLEX BIT_ULL(2)
+#define GMI_PORT_CFG_SLOT_TIME BIT_ULL(3)
+#define GMI_PORT_CFG_SPEED_MSB BIT_ULL(8)
+#define GMI_PORT_CFG_RX_IDLE BIT_ULL(12)
+#define GMI_PORT_CFG_TX_IDLE BIT_ULL(13)
+#define BGX_GMP_GMI_RXX_FRM_CTL 0x38028
+#define BGX_GMP_GMI_RXX_JABBER 0x38038
+#define BGX_GMP_GMI_TXX_THRESH 0x38210
+#define BGX_GMP_GMI_TXX_APPEND 0x38218
+#define BGX_GMP_GMI_TXX_SLOT 0x38220
+#define BGX_GMP_GMI_TXX_BURST 0x38228
+#define BGX_GMP_GMI_TXX_MIN_PKT 0x38240
+#define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300
+#define BGX_GMP_GMI_TXX_INT 0x38500
+#define BGX_GMP_GMI_TXX_INT_W1S 0x38508
+#define BGX_GMP_GMI_TXX_INT_ENA_W1C 0x38510
+#define BGX_GMP_GMI_TXX_INT_ENA_W1S 0x38518
+#define GMI_TXX_INT_PTP_LOST BIT_ULL(4)
+#define GMI_TXX_INT_LATE_COL BIT_ULL(3)
+#define GMI_TXX_INT_XSDEF BIT_ULL(2)
+#define GMI_TXX_INT_XSCOL BIT_ULL(1)
+#define GMI_TXX_INT_UNDFLW BIT_ULL(0)
+
+#define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */
+#define BGX_MSIX_VEC_0_29_CTL 0x400008
+#define BGX_MSIX_PBA_0 0x4F0000
+
+/* MSI-X interrupts */
+#define BGX_MSIX_VECTORS 30
+#define BGX_LMAC_VEC_OFFSET 7
+#define BGX_MSIX_VEC_SHIFT 4
+
+#define CMRX_INT 0
+#define SPUX_INT 1
+#define SMUX_RX_INT 2
+#define SMUX_TX_INT 3
+#define GMPX_PCS_INT 4
+#define GMPX_GMI_RX_INT 5
+#define GMPX_GMI_TX_INT 6
+#define CMR_MEM_INT 28
+#define SPU_MEM_INT 29
+
+#define LMAC_INTR_LINK_UP BIT(0)
+#define LMAC_INTR_LINK_DOWN BIT(1)
+
+#define BGX_XCAST_BCAST_ACCEPT BIT(0)
+#define BGX_XCAST_MCAST_ACCEPT BIT(1)
+#define BGX_XCAST_MCAST_FILTER BIT(2)
+
+void bgx_set_dmac_cam_filter(int node, int bgx_idx, int lmacid, u64 mac, u8 vf);
+void bgx_reset_xcast_mode(int node, int bgx_idx, int lmacid, u8 vf);
+void bgx_set_xcast_mode(int node, int bgx_idx, int lmacid, u8 mode);
+void octeon_mdiobus_force_mod_depencency(void);
+void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable);
+void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);
+unsigned bgx_get_map(int node);
+int bgx_get_lmac_count(int node, int bgx);
+const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid);
+void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac);
+void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status);
+void bgx_lmac_internal_loopback(int node, int bgx_idx,
+ int lmac_idx, bool enable);
+void bgx_config_timestamping(int node, int bgx_idx, int lmacid, bool enable);
+void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause);
+void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause);
+
+void xcv_init_hw(void);
+void xcv_setup_link(bool link_up, int link_speed);
+
+u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx);
+u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx);
+#define BGX_RX_STATS_COUNT 11
+#define BGX_TX_STATS_COUNT 18
+
+struct bgx_stats {
+ u64 rx_stats[BGX_RX_STATS_COUNT];
+ u64 tx_stats[BGX_TX_STATS_COUNT];
+};
+
+enum LMAC_TYPE {
+ BGX_MODE_SGMII = 0, /* 1 lane, 1.250 Gbaud */
+ BGX_MODE_XAUI = 1, /* 4 lanes, 3.125 Gbaud */
+ BGX_MODE_DXAUI = 1, /* 4 lanes, 6.250 Gbaud */
+ BGX_MODE_RXAUI = 2, /* 2 lanes, 6.250 Gbaud */
+ BGX_MODE_XFI = 3, /* 1 lane, 10.3125 Gbaud */
+ BGX_MODE_XLAUI = 4, /* 4 lanes, 10.3125 Gbaud */
+ BGX_MODE_10G_KR = 3,/* 1 lane, 10.3125 Gbaud */
+ BGX_MODE_40G_KR = 4,/* 4 lanes, 10.3125 Gbaud */
+ BGX_MODE_RGMII = 5,
+ BGX_MODE_QSGMII = 6,
+ BGX_MODE_INVALID = 7,
+};
+
+#endif /* THUNDER_BGX_H */
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
new file mode 100644
index 000000000..2d5e8dab1
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+
+#include "nic.h"
+#include "thunder_bgx.h"
+
+#define DRV_NAME "thunder_xcv"
+#define DRV_VERSION "1.0"
+
+/* Register offsets */
+#define XCV_RESET 0x00
+#define PORT_EN BIT_ULL(63)
+#define CLK_RESET BIT_ULL(15)
+#define DLL_RESET BIT_ULL(11)
+#define COMP_EN BIT_ULL(7)
+#define TX_PKT_RESET BIT_ULL(3)
+#define TX_DATA_RESET BIT_ULL(2)
+#define RX_PKT_RESET BIT_ULL(1)
+#define RX_DATA_RESET BIT_ULL(0)
+#define XCV_DLL_CTL 0x10
+#define CLKRX_BYP BIT_ULL(23)
+#define CLKTX_BYP BIT_ULL(15)
+#define XCV_COMP_CTL 0x20
+#define DRV_BYP BIT_ULL(63)
+#define XCV_CTL 0x30
+#define XCV_INT 0x40
+#define XCV_INT_W1S 0x48
+#define XCV_INT_ENA_W1C 0x50
+#define XCV_INT_ENA_W1S 0x58
+#define XCV_INBND_STATUS 0x80
+#define XCV_BATCH_CRD_RET 0x100
+
+struct xcv {
+ void __iomem *reg_base;
+ struct pci_dev *pdev;
+};
+
+static struct xcv *xcv;
+
+/* Supported devices */
+static const struct pci_device_id xcv_id_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xA056) },
+ { 0, } /* end of table */
+};
+
+MODULE_AUTHOR("Cavium Inc");
+MODULE_DESCRIPTION("Cavium Thunder RGX/XCV Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, xcv_id_table);
+
+void xcv_init_hw(void)
+{
+ u64 cfg;
+
+ /* Take DLL out of reset */
+ cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+ cfg &= ~DLL_RESET;
+ writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+ /* Take clock tree out of reset */
+ cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+ cfg &= ~CLK_RESET;
+ writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+ /* Wait for DLL to lock */
+ msleep(1);
+
+ /* Configure DLL - enable or bypass
+ * TX no bypass, RX bypass
+ */
+ cfg = readq_relaxed(xcv->reg_base + XCV_DLL_CTL);
+ cfg &= ~0xFF03;
+ cfg |= CLKRX_BYP;
+ writeq_relaxed(cfg, xcv->reg_base + XCV_DLL_CTL);
+
+ /* Enable compensation controller and force the
+ * write to be visible to HW by readig back.
+ */
+ cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+ cfg |= COMP_EN;
+ writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+ readq_relaxed(xcv->reg_base + XCV_RESET);
+ /* Wait for compensation state machine to lock */
+ msleep(10);
+
+ /* enable the XCV block */
+ cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+ cfg |= PORT_EN;
+ writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+ cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+ cfg |= CLK_RESET;
+ writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+}
+EXPORT_SYMBOL(xcv_init_hw);
+
+void xcv_setup_link(bool link_up, int link_speed)
+{
+ u64 cfg;
+ int speed = 2;
+
+ if (!xcv) {
+ pr_err("XCV init not done, probe may have failed\n");
+ return;
+ }
+
+ if (link_speed == 100)
+ speed = 1;
+ else if (link_speed == 10)
+ speed = 0;
+
+ if (link_up) {
+ /* set operating speed */
+ cfg = readq_relaxed(xcv->reg_base + XCV_CTL);
+ cfg &= ~0x03;
+ cfg |= speed;
+ writeq_relaxed(cfg, xcv->reg_base + XCV_CTL);
+
+ /* Reset datapaths */
+ cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+ cfg |= TX_DATA_RESET | RX_DATA_RESET;
+ writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+ /* Enable the packet flow */
+ cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+ cfg |= TX_PKT_RESET | RX_PKT_RESET;
+ writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+ /* Return credits to RGX */
+ writeq_relaxed(0x01, xcv->reg_base + XCV_BATCH_CRD_RET);
+ } else {
+ /* Disable packet flow */
+ cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+ cfg &= ~(TX_PKT_RESET | RX_PKT_RESET);
+ writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+ readq_relaxed(xcv->reg_base + XCV_RESET);
+ }
+}
+EXPORT_SYMBOL(xcv_setup_link);
+
+static int xcv_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ int err;
+ struct device *dev = &pdev->dev;
+
+ xcv = devm_kzalloc(dev, sizeof(struct xcv), GFP_KERNEL);
+ if (!xcv)
+ return -ENOMEM;
+ xcv->pdev = pdev;
+
+ pci_set_drvdata(pdev, xcv);
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(dev, "Failed to enable PCI device\n");
+ goto err_kfree;
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ dev_err(dev, "PCI request regions failed 0x%x\n", err);
+ goto err_disable_device;
+ }
+
+ /* MAP configuration registers */
+ xcv->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+ if (!xcv->reg_base) {
+ dev_err(dev, "XCV: Cannot map CSR memory space, aborting\n");
+ err = -ENOMEM;
+ goto err_release_regions;
+ }
+
+ return 0;
+
+err_release_regions:
+ pci_release_regions(pdev);
+err_disable_device:
+ pci_disable_device(pdev);
+err_kfree:
+ devm_kfree(dev, xcv);
+ xcv = NULL;
+ return err;
+}
+
+static void xcv_remove(struct pci_dev *pdev)
+{
+ struct device *dev = &pdev->dev;
+
+ if (xcv) {
+ devm_kfree(dev, xcv);
+ xcv = NULL;
+ }
+
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+}
+
+static struct pci_driver xcv_driver = {
+ .name = DRV_NAME,
+ .id_table = xcv_id_table,
+ .probe = xcv_probe,
+ .remove = xcv_remove,
+};
+
+static int __init xcv_init_module(void)
+{
+ pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
+
+ return pci_register_driver(&xcv_driver);
+}
+
+static void __exit xcv_cleanup_module(void)
+{
+ pci_unregister_driver(&xcv_driver);
+}
+
+module_init(xcv_init_module);
+module_exit(xcv_cleanup_module);