diff options
Diffstat (limited to 'drivers/net/ethernet/cavium/thunder/nicvf_queues.c')
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 1976 |
1 files changed, 1976 insertions, 0 deletions
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c new file mode 100644 index 000000000..d9bcbe469 --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -0,0 +1,1976 @@ +/* + * Copyright (C) 2015 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/ip.h> +#include <linux/etherdevice.h> +#include <linux/iommu.h> +#include <net/ip.h> +#include <net/tso.h> + +#include "nic_reg.h" +#include "nic.h" +#include "q_struct.h" +#include "nicvf_queues.h" + +static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry, + int size, u64 data); +static void nicvf_get_page(struct nicvf *nic) +{ + if (!nic->rb_pageref || !nic->rb_page) + return; + + page_ref_add(nic->rb_page, nic->rb_pageref); + nic->rb_pageref = 0; +} + +/* Poll a register for a specific value */ +static int nicvf_poll_reg(struct nicvf *nic, int qidx, + u64 reg, int bit_pos, int bits, int val) +{ + u64 bit_mask; + u64 reg_val; + int timeout = 10; + + bit_mask = (1ULL << bits) - 1; + bit_mask = (bit_mask << bit_pos); + + while (timeout) { + reg_val = nicvf_queue_reg_read(nic, reg, qidx); + if (((reg_val & bit_mask) >> bit_pos) == val) + return 0; + usleep_range(1000, 2000); + timeout--; + } + netdev_err(nic->netdev, "Poll on reg 0x%llx failed\n", reg); + return 1; +} + +/* Allocate memory for a queue's descriptors */ +static int nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem, + int q_len, int desc_size, int align_bytes) +{ + dmem->q_len = q_len; + dmem->size = (desc_size * q_len) + align_bytes; + /* Save address, need it while freeing */ + dmem->unalign_base = dma_zalloc_coherent(&nic->pdev->dev, dmem->size, + &dmem->dma, GFP_KERNEL); + if (!dmem->unalign_base) + return -ENOMEM; + + /* Align memory address for 'align_bytes' */ + dmem->phys_base = NICVF_ALIGNED_ADDR((u64)dmem->dma, align_bytes); + dmem->base = dmem->unalign_base + (dmem->phys_base - dmem->dma); + return 0; +} + +/* Free queue's descriptor memory */ +static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem) +{ + if (!dmem) + return; + + dma_free_coherent(&nic->pdev->dev, dmem->size, + dmem->unalign_base, dmem->dma); + dmem->unalign_base = NULL; + dmem->base = NULL; +} + +#define XDP_PAGE_REFCNT_REFILL 256 + +/* Allocate a new page or recycle one if possible + * + * We cannot optimize dma mapping here, since + * 1. It's only one RBDR ring for 8 Rx queues. + * 2. CQE_RX gives address of the buffer where pkt has been DMA'ed + * and not idx into RBDR ring, so can't refer to saved info. + * 3. There are multiple receive buffers per page + */ +static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic, + struct rbdr *rbdr, gfp_t gfp) +{ + int ref_count; + struct page *page = NULL; + struct pgcache *pgcache, *next; + + /* Check if page is already allocated */ + pgcache = &rbdr->pgcache[rbdr->pgidx]; + page = pgcache->page; + /* Check if page can be recycled */ + if (page) { + ref_count = page_ref_count(page); + /* This page can be recycled if internal ref_count and page's + * ref_count are equal, indicating that the page has been used + * once for packet transmission. For non-XDP mode, internal + * ref_count is always '1'. + */ + if (rbdr->is_xdp) { + if (ref_count == pgcache->ref_count) + pgcache->ref_count--; + else + page = NULL; + } else if (ref_count != 1) { + page = NULL; + } + } + + if (!page) { + page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 0); + if (!page) + return NULL; + + this_cpu_inc(nic->pnicvf->drv_stats->page_alloc); + + /* Check for space */ + if (rbdr->pgalloc >= rbdr->pgcnt) { + /* Page can still be used */ + nic->rb_page = page; + return NULL; + } + + /* Save the page in page cache */ + pgcache->page = page; + pgcache->dma_addr = 0; + pgcache->ref_count = 0; + rbdr->pgalloc++; + } + + /* Take additional page references for recycling */ + if (rbdr->is_xdp) { + /* Since there is single RBDR (i.e single core doing + * page recycling) per 8 Rx queues, in XDP mode adjusting + * page references atomically is the biggest bottleneck, so + * take bunch of references at a time. + * + * So here, below reference counts defer by '1'. + */ + if (!pgcache->ref_count) { + pgcache->ref_count = XDP_PAGE_REFCNT_REFILL; + page_ref_add(page, XDP_PAGE_REFCNT_REFILL); + } + } else { + /* In non-XDP case, single 64K page is divided across multiple + * receive buffers, so cost of recycling is less anyway. + * So we can do with just one extra reference. + */ + page_ref_add(page, 1); + } + + rbdr->pgidx++; + rbdr->pgidx &= (rbdr->pgcnt - 1); + + /* Prefetch refcount of next page in page cache */ + next = &rbdr->pgcache[rbdr->pgidx]; + page = next->page; + if (page) + prefetch(&page->_refcount); + + return pgcache; +} + +/* Allocate buffer for packet reception */ +static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr, + gfp_t gfp, u32 buf_len, u64 *rbuf) +{ + struct pgcache *pgcache = NULL; + + /* Check if request can be accomodated in previous allocated page. + * But in XDP mode only one buffer per page is permitted. + */ + if (!rbdr->is_xdp && nic->rb_page && + ((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) { + nic->rb_pageref++; + goto ret; + } + + nicvf_get_page(nic); + nic->rb_page = NULL; + + /* Get new page, either recycled or new one */ + pgcache = nicvf_alloc_page(nic, rbdr, gfp); + if (!pgcache && !nic->rb_page) { + this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures); + return -ENOMEM; + } + + nic->rb_page_offset = 0; + + /* Reserve space for header modifications by BPF program */ + if (rbdr->is_xdp) + buf_len += XDP_PACKET_HEADROOM; + + /* Check if it's recycled */ + if (pgcache) + nic->rb_page = pgcache->page; +ret: + if (rbdr->is_xdp && pgcache && pgcache->dma_addr) { + *rbuf = pgcache->dma_addr; + } else { + /* HW will ensure data coherency, CPU sync not required */ + *rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page, + nic->rb_page_offset, buf_len, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) { + if (!nic->rb_page_offset) + __free_pages(nic->rb_page, 0); + nic->rb_page = NULL; + return -ENOMEM; + } + if (pgcache) + pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM; + nic->rb_page_offset += buf_len; + } + + return 0; +} + +/* Build skb around receive buffer */ +static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic, + u64 rb_ptr, int len) +{ + void *data; + struct sk_buff *skb; + + data = phys_to_virt(rb_ptr); + + /* Now build an skb to give to stack */ + skb = build_skb(data, RCV_FRAG_LEN); + if (!skb) { + put_page(virt_to_page(data)); + return NULL; + } + + prefetch(skb->data); + return skb; +} + +/* Allocate RBDR ring and populate receive buffers */ +static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, + int ring_len, int buf_size) +{ + int idx; + u64 rbuf; + struct rbdr_entry_t *desc; + int err; + + err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len, + sizeof(struct rbdr_entry_t), + NICVF_RCV_BUF_ALIGN_BYTES); + if (err) + return err; + + rbdr->desc = rbdr->dmem.base; + /* Buffer size has to be in multiples of 128 bytes */ + rbdr->dma_size = buf_size; + rbdr->enable = true; + rbdr->thresh = RBDR_THRESH; + rbdr->head = 0; + rbdr->tail = 0; + + /* Initialize page recycling stuff. + * + * Can't use single buffer per page especially with 64K pages. + * On embedded platforms i.e 81xx/83xx available memory itself + * is low and minimum ring size of RBDR is 8K, that takes away + * lots of memory. + * + * But for XDP it has to be a single buffer per page. + */ + if (!nic->pnicvf->xdp_prog) { + rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size); + rbdr->is_xdp = false; + } else { + rbdr->pgcnt = ring_len; + rbdr->is_xdp = true; + } + rbdr->pgcnt = roundup_pow_of_two(rbdr->pgcnt); + rbdr->pgcache = kcalloc(rbdr->pgcnt, sizeof(*rbdr->pgcache), + GFP_KERNEL); + if (!rbdr->pgcache) + return -ENOMEM; + rbdr->pgidx = 0; + rbdr->pgalloc = 0; + + nic->rb_page = NULL; + for (idx = 0; idx < ring_len; idx++) { + err = nicvf_alloc_rcv_buffer(nic, rbdr, GFP_KERNEL, + RCV_FRAG_LEN, &rbuf); + if (err) { + /* To free already allocated and mapped ones */ + rbdr->tail = idx - 1; + return err; + } + + desc = GET_RBDR_DESC(rbdr, idx); + desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1); + } + + nicvf_get_page(nic); + + return 0; +} + +/* Free RBDR ring and its receive buffers */ +static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) +{ + int head, tail; + u64 buf_addr, phys_addr; + struct pgcache *pgcache; + struct rbdr_entry_t *desc; + + if (!rbdr) + return; + + rbdr->enable = false; + if (!rbdr->dmem.base) + return; + + head = rbdr->head; + tail = rbdr->tail; + + /* Release page references */ + while (head != tail) { + desc = GET_RBDR_DESC(rbdr, head); + buf_addr = desc->buf_addr; + phys_addr = nicvf_iova_to_phys(nic, buf_addr); + dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + if (phys_addr) + put_page(virt_to_page(phys_to_virt(phys_addr))); + head++; + head &= (rbdr->dmem.q_len - 1); + } + /* Release buffer of tail desc */ + desc = GET_RBDR_DESC(rbdr, tail); + buf_addr = desc->buf_addr; + phys_addr = nicvf_iova_to_phys(nic, buf_addr); + dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + if (phys_addr) + put_page(virt_to_page(phys_to_virt(phys_addr))); + + /* Sync page cache info */ + smp_rmb(); + + /* Release additional page references held for recycling */ + head = 0; + while (head < rbdr->pgcnt) { + pgcache = &rbdr->pgcache[head]; + if (pgcache->page && page_ref_count(pgcache->page) != 0) { + if (rbdr->is_xdp) { + page_ref_sub(pgcache->page, + pgcache->ref_count - 1); + } + put_page(pgcache->page); + } + head++; + } + + /* Free RBDR ring */ + nicvf_free_q_desc_mem(nic, &rbdr->dmem); +} + +/* Refill receive buffer descriptors with new buffers. + */ +static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp) +{ + struct queue_set *qs = nic->qs; + int rbdr_idx = qs->rbdr_cnt; + int tail, qcount; + int refill_rb_cnt; + struct rbdr *rbdr; + struct rbdr_entry_t *desc; + u64 rbuf; + int new_rb = 0; + +refill: + if (!rbdr_idx) + return; + rbdr_idx--; + rbdr = &qs->rbdr[rbdr_idx]; + /* Check if it's enabled */ + if (!rbdr->enable) + goto next_rbdr; + + /* Get no of desc's to be refilled */ + qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx); + qcount &= 0x7FFFF; + /* Doorbell can be ringed with a max of ring size minus 1 */ + if (qcount >= (qs->rbdr_len - 1)) + goto next_rbdr; + else + refill_rb_cnt = qs->rbdr_len - qcount - 1; + + /* Sync page cache info */ + smp_rmb(); + + /* Start filling descs from tail */ + tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3; + while (refill_rb_cnt) { + tail++; + tail &= (rbdr->dmem.q_len - 1); + + if (nicvf_alloc_rcv_buffer(nic, rbdr, gfp, RCV_FRAG_LEN, &rbuf)) + break; + + desc = GET_RBDR_DESC(rbdr, tail); + desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1); + refill_rb_cnt--; + new_rb++; + } + + nicvf_get_page(nic); + + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + + /* Check if buffer allocation failed */ + if (refill_rb_cnt) + nic->rb_alloc_fail = true; + else + nic->rb_alloc_fail = false; + + /* Notify HW */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR, + rbdr_idx, new_rb); +next_rbdr: + /* Re-enable RBDR interrupts only if buffer allocation is success */ + if (!nic->rb_alloc_fail && rbdr->enable && + netif_running(nic->pnicvf->netdev)) + nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx); + + if (rbdr_idx) + goto refill; +} + +/* Alloc rcv buffers in non-atomic mode for better success */ +void nicvf_rbdr_work(struct work_struct *work) +{ + struct nicvf *nic = container_of(work, struct nicvf, rbdr_work.work); + + nicvf_refill_rbdr(nic, GFP_KERNEL); + if (nic->rb_alloc_fail) + schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10)); + else + nic->rb_work_scheduled = false; +} + +/* In Softirq context, alloc rcv buffers in atomic mode */ +void nicvf_rbdr_task(unsigned long data) +{ + struct nicvf *nic = (struct nicvf *)data; + + nicvf_refill_rbdr(nic, GFP_ATOMIC); + if (nic->rb_alloc_fail) { + nic->rb_work_scheduled = true; + schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10)); + } +} + +/* Initialize completion queue */ +static int nicvf_init_cmp_queue(struct nicvf *nic, + struct cmp_queue *cq, int q_len) +{ + int err; + + err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE, + NICVF_CQ_BASE_ALIGN_BYTES); + if (err) + return err; + + cq->desc = cq->dmem.base; + cq->thresh = pass1_silicon(nic->pdev) ? 0 : CMP_QUEUE_CQE_THRESH; + nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1; + + return 0; +} + +static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq) +{ + if (!cq) + return; + if (!cq->dmem.base) + return; + + nicvf_free_q_desc_mem(nic, &cq->dmem); +} + +/* Initialize transmit queue */ +static int nicvf_init_snd_queue(struct nicvf *nic, + struct snd_queue *sq, int q_len, int qidx) +{ + int err; + + err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE, + NICVF_SQ_BASE_ALIGN_BYTES); + if (err) + return err; + + sq->desc = sq->dmem.base; + sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL); + if (!sq->skbuff) + return -ENOMEM; + + sq->head = 0; + sq->tail = 0; + sq->thresh = SND_QUEUE_THRESH; + + /* Check if this SQ is a XDP TX queue */ + if (nic->sqs_mode) + qidx += ((nic->sqs_id + 1) * MAX_SND_QUEUES_PER_QS); + if (qidx < nic->pnicvf->xdp_tx_queues) { + /* Alloc memory to save page pointers for XDP_TX */ + sq->xdp_page = kcalloc(q_len, sizeof(u64), GFP_KERNEL); + if (!sq->xdp_page) + return -ENOMEM; + sq->xdp_desc_cnt = 0; + sq->xdp_free_cnt = q_len - 1; + sq->is_xdp = true; + } else { + sq->xdp_page = NULL; + sq->xdp_desc_cnt = 0; + sq->xdp_free_cnt = 0; + sq->is_xdp = false; + + atomic_set(&sq->free_cnt, q_len - 1); + + /* Preallocate memory for TSO segment's header */ + sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev, + q_len * TSO_HEADER_SIZE, + &sq->tso_hdrs_phys, + GFP_KERNEL); + if (!sq->tso_hdrs) + return -ENOMEM; + } + + return 0; +} + +void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq, + int hdr_sqe, u8 subdesc_cnt) +{ + u8 idx; + struct sq_gather_subdesc *gather; + + /* Unmap DMA mapped skb data buffers */ + for (idx = 0; idx < subdesc_cnt; idx++) { + hdr_sqe++; + hdr_sqe &= (sq->dmem.q_len - 1); + gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe); + /* HW will ensure data coherency, CPU sync not required */ + dma_unmap_page_attrs(&nic->pdev->dev, gather->addr, + gather->size, DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + } +} + +static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) +{ + struct sk_buff *skb; + struct page *page; + struct sq_hdr_subdesc *hdr; + struct sq_hdr_subdesc *tso_sqe; + + if (!sq) + return; + if (!sq->dmem.base) + return; + + if (sq->tso_hdrs) { + dma_free_coherent(&nic->pdev->dev, + sq->dmem.q_len * TSO_HEADER_SIZE, + sq->tso_hdrs, sq->tso_hdrs_phys); + sq->tso_hdrs = NULL; + } + + /* Free pending skbs in the queue */ + smp_rmb(); + while (sq->head != sq->tail) { + skb = (struct sk_buff *)sq->skbuff[sq->head]; + if (!skb || !sq->xdp_page) + goto next; + + page = (struct page *)sq->xdp_page[sq->head]; + if (!page) + goto next; + else + put_page(page); + + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head); + /* Check for dummy descriptor used for HW TSO offload on 88xx */ + if (hdr->dont_send) { + /* Get actual TSO descriptors and unmap them */ + tso_sqe = + (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); + nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, + tso_sqe->subdesc_cnt); + } else { + nicvf_unmap_sndq_buffers(nic, sq, sq->head, + hdr->subdesc_cnt); + } + if (skb) + dev_kfree_skb_any(skb); +next: + sq->head++; + sq->head &= (sq->dmem.q_len - 1); + } + kfree(sq->skbuff); + kfree(sq->xdp_page); + nicvf_free_q_desc_mem(nic, &sq->dmem); +} + +static void nicvf_reclaim_snd_queue(struct nicvf *nic, + struct queue_set *qs, int qidx) +{ + /* Disable send queue */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0); + /* Check if SQ is stopped */ + if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01)) + return; + /* Reset send queue */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET); +} + +static void nicvf_reclaim_rcv_queue(struct nicvf *nic, + struct queue_set *qs, int qidx) +{ + union nic_mbx mbx = {}; + + /* Make sure all packets in the pipeline are written back into mem */ + mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC; + nicvf_send_msg_to_pf(nic, &mbx); +} + +static void nicvf_reclaim_cmp_queue(struct nicvf *nic, + struct queue_set *qs, int qidx) +{ + /* Disable timer threshold (doesn't get reset upon CQ reset */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0); + /* Disable completion queue */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0); + /* Reset completion queue */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET); +} + +static void nicvf_reclaim_rbdr(struct nicvf *nic, + struct rbdr *rbdr, int qidx) +{ + u64 tmp, fifo_state; + int timeout = 10; + + /* Save head and tail pointers for feeing up buffers */ + rbdr->head = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_HEAD, + qidx) >> 3; + rbdr->tail = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_TAIL, + qidx) >> 3; + + /* If RBDR FIFO is in 'FAIL' state then do a reset first + * before relaiming. + */ + fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx); + if (((fifo_state >> 62) & 0x03) == 0x3) + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, + qidx, NICVF_RBDR_RESET); + + /* Disable RBDR */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0); + if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00)) + return; + while (1) { + tmp = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_PREFETCH_STATUS, + qidx); + if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF)) + break; + usleep_range(1000, 2000); + timeout--; + if (!timeout) { + netdev_err(nic->netdev, + "Failed polling on prefetch status\n"); + return; + } + } + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, + qidx, NICVF_RBDR_RESET); + + if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02)) + return; + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00); + if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00)) + return; +} + +void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features) +{ + u64 rq_cfg; + int sqs; + + rq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_RQ_GEN_CFG, 0); + + /* Enable first VLAN stripping */ + if (features & NETIF_F_HW_VLAN_CTAG_RX) + rq_cfg |= (1ULL << 25); + else + rq_cfg &= ~(1ULL << 25); + nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, rq_cfg); + + /* Configure Secondary Qsets, if any */ + for (sqs = 0; sqs < nic->sqs_count; sqs++) + if (nic->snicvf[sqs]) + nicvf_queue_reg_write(nic->snicvf[sqs], + NIC_QSET_RQ_GEN_CFG, 0, rq_cfg); +} + +static void nicvf_reset_rcv_queue_stats(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + /* Reset all RQ/SQ and VF stats */ + mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER; + mbx.reset_stat.rx_stat_mask = 0x3FFF; + mbx.reset_stat.tx_stat_mask = 0x1F; + mbx.reset_stat.rq_stat_mask = 0xFFFF; + mbx.reset_stat.sq_stat_mask = 0xFFFF; + nicvf_send_msg_to_pf(nic, &mbx); +} + +/* Configures receive queue */ +static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable) +{ + union nic_mbx mbx = {}; + struct rcv_queue *rq; + struct rq_cfg rq_cfg; + + rq = &qs->rq[qidx]; + rq->enable = enable; + + /* Disable receive queue */ + nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0); + + if (!rq->enable) { + nicvf_reclaim_rcv_queue(nic, qs, qidx); + xdp_rxq_info_unreg(&rq->xdp_rxq); + return; + } + + rq->cq_qs = qs->vnic_id; + rq->cq_idx = qidx; + rq->start_rbdr_qs = qs->vnic_id; + rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1; + rq->cont_rbdr_qs = qs->vnic_id; + rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1; + /* all writes of RBDR data to be loaded into L2 Cache as well*/ + rq->caching = 1; + + /* Driver have no proper error path for failed XDP RX-queue info reg */ + WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx) < 0); + + /* Send a mailbox msg to PF to config RQ */ + mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG; + mbx.rq.qs_num = qs->vnic_id; + mbx.rq.rq_num = qidx; + mbx.rq.cfg = ((u64)rq->caching << 26) | (rq->cq_qs << 19) | + (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) | + (rq->cont_qs_rbdr_idx << 8) | + (rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx); + nicvf_send_msg_to_pf(nic, &mbx); + + mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG; + mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) | + (RQ_PASS_RBDR_LVL << 16) | (RQ_PASS_CQ_LVL << 8) | + (qs->vnic_id << 0); + nicvf_send_msg_to_pf(nic, &mbx); + + /* RQ drop config + * Enable CQ drop to reserve sufficient CQEs for all tx packets + */ + mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG; + mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) | + (RQ_PASS_RBDR_LVL << 40) | (RQ_DROP_RBDR_LVL << 32) | + (RQ_PASS_CQ_LVL << 16) | (RQ_DROP_CQ_LVL << 8); + nicvf_send_msg_to_pf(nic, &mbx); + + if (!nic->sqs_mode && (qidx == 0)) { + /* Enable checking L3/L4 length and TCP/UDP checksums + * Also allow IPv6 pkts with zero UDP checksum. + */ + nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, + (BIT(24) | BIT(23) | BIT(21) | BIT(20))); + nicvf_config_vlan_stripping(nic, nic->netdev->features); + } + + /* Enable Receive queue */ + memset(&rq_cfg, 0, sizeof(struct rq_cfg)); + rq_cfg.ena = 1; + rq_cfg.tcp_ena = 0; + nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg); +} + +/* Configures completion queue */ +void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable) +{ + struct cmp_queue *cq; + struct cq_cfg cq_cfg; + + cq = &qs->cq[qidx]; + cq->enable = enable; + + if (!cq->enable) { + nicvf_reclaim_cmp_queue(nic, qs, qidx); + return; + } + + /* Reset completion queue */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET); + + if (!cq->enable) + return; + + spin_lock_init(&cq->lock); + /* Set completion queue base address */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE, + qidx, (u64)(cq->dmem.phys_base)); + + /* Enable Completion queue */ + memset(&cq_cfg, 0, sizeof(struct cq_cfg)); + cq_cfg.ena = 1; + cq_cfg.reset = 0; + cq_cfg.caching = 0; + cq_cfg.qsize = ilog2(qs->cq_len >> 10); + cq_cfg.avg_con = 0; + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(u64 *)&cq_cfg); + + /* Set threshold value for interrupt generation */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh); + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, + qidx, CMP_QUEUE_TIMER_THRESH); +} + +/* Configures transmit queue */ +static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable) +{ + union nic_mbx mbx = {}; + struct snd_queue *sq; + struct sq_cfg sq_cfg; + + sq = &qs->sq[qidx]; + sq->enable = enable; + + if (!sq->enable) { + nicvf_reclaim_snd_queue(nic, qs, qidx); + return; + } + + /* Reset send queue */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET); + + sq->cq_qs = qs->vnic_id; + sq->cq_idx = qidx; + + /* Send a mailbox msg to PF to config SQ */ + mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG; + mbx.sq.qs_num = qs->vnic_id; + mbx.sq.sq_num = qidx; + mbx.sq.sqs_mode = nic->sqs_mode; + mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx; + nicvf_send_msg_to_pf(nic, &mbx); + + /* Set queue base address */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE, + qidx, (u64)(sq->dmem.phys_base)); + + /* Enable send queue & set queue size */ + memset(&sq_cfg, 0, sizeof(struct sq_cfg)); + sq_cfg.ena = 1; + sq_cfg.reset = 0; + sq_cfg.ldwb = 0; + sq_cfg.qsize = ilog2(qs->sq_len >> 10); + sq_cfg.tstmp_bgx_intf = 0; + /* CQ's level at which HW will stop processing SQEs to avoid + * transmitting a pkt with no space in CQ to post CQE_TX. + */ + sq_cfg.cq_limit = (CMP_QUEUE_PIPELINE_RSVD * 256) / qs->cq_len; + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg); + + /* Set threshold value for interrupt generation */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh); + + /* Set queue:cpu affinity for better load distribution */ + if (cpu_online(qidx)) { + cpumask_set_cpu(qidx, &sq->affinity_mask); + netif_set_xps_queue(nic->netdev, + &sq->affinity_mask, qidx); + } +} + +/* Configures receive buffer descriptor ring */ +static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable) +{ + struct rbdr *rbdr; + struct rbdr_cfg rbdr_cfg; + + rbdr = &qs->rbdr[qidx]; + nicvf_reclaim_rbdr(nic, rbdr, qidx); + if (!enable) + return; + + /* Set descriptor base address */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE, + qidx, (u64)(rbdr->dmem.phys_base)); + + /* Enable RBDR & set queue size */ + /* Buffer size should be in multiples of 128 bytes */ + memset(&rbdr_cfg, 0, sizeof(struct rbdr_cfg)); + rbdr_cfg.ena = 1; + rbdr_cfg.reset = 0; + rbdr_cfg.ldwb = 0; + rbdr_cfg.qsize = RBDR_SIZE; + rbdr_cfg.avg_con = 0; + rbdr_cfg.lines = rbdr->dma_size / 128; + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, + qidx, *(u64 *)&rbdr_cfg); + + /* Notify HW */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR, + qidx, qs->rbdr_len - 1); + + /* Set threshold value for interrupt generation */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH, + qidx, rbdr->thresh - 1); +} + +/* Requests PF to assign and enable Qset */ +void nicvf_qset_config(struct nicvf *nic, bool enable) +{ + union nic_mbx mbx = {}; + struct queue_set *qs = nic->qs; + struct qs_cfg *qs_cfg; + + if (!qs) { + netdev_warn(nic->netdev, + "Qset is still not allocated, don't init queues\n"); + return; + } + + qs->enable = enable; + qs->vnic_id = nic->vf_id; + + /* Send a mailbox msg to PF to config Qset */ + mbx.qs.msg = NIC_MBOX_MSG_QS_CFG; + mbx.qs.num = qs->vnic_id; + mbx.qs.sqs_count = nic->sqs_count; + + mbx.qs.cfg = 0; + qs_cfg = (struct qs_cfg *)&mbx.qs.cfg; + if (qs->enable) { + qs_cfg->ena = 1; +#ifdef __BIG_ENDIAN + qs_cfg->be = 1; +#endif + qs_cfg->vnic = qs->vnic_id; + /* Enable Tx timestamping capability */ + if (nic->ptp_clock) + qs_cfg->send_tstmp_ena = 1; + } + nicvf_send_msg_to_pf(nic, &mbx); +} + +static void nicvf_free_resources(struct nicvf *nic) +{ + int qidx; + struct queue_set *qs = nic->qs; + + /* Free receive buffer descriptor ring */ + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) + nicvf_free_rbdr(nic, &qs->rbdr[qidx]); + + /* Free completion queue */ + for (qidx = 0; qidx < qs->cq_cnt; qidx++) + nicvf_free_cmp_queue(nic, &qs->cq[qidx]); + + /* Free send queue */ + for (qidx = 0; qidx < qs->sq_cnt; qidx++) + nicvf_free_snd_queue(nic, &qs->sq[qidx]); +} + +static int nicvf_alloc_resources(struct nicvf *nic) +{ + int qidx; + struct queue_set *qs = nic->qs; + + /* Alloc receive buffer descriptor ring */ + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { + if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len, + DMA_BUFFER_LEN)) + goto alloc_fail; + } + + /* Alloc send queue */ + for (qidx = 0; qidx < qs->sq_cnt; qidx++) { + if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx)) + goto alloc_fail; + } + + /* Alloc completion queue */ + for (qidx = 0; qidx < qs->cq_cnt; qidx++) { + if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len)) + goto alloc_fail; + } + + return 0; +alloc_fail: + nicvf_free_resources(nic); + return -ENOMEM; +} + +int nicvf_set_qset_resources(struct nicvf *nic) +{ + struct queue_set *qs; + + qs = devm_kzalloc(&nic->pdev->dev, sizeof(*qs), GFP_KERNEL); + if (!qs) + return -ENOMEM; + nic->qs = qs; + + /* Set count of each queue */ + qs->rbdr_cnt = DEFAULT_RBDR_CNT; + qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus()); + qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus()); + qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt); + + /* Set queue lengths */ + qs->rbdr_len = RCV_BUF_COUNT; + qs->sq_len = SND_QUEUE_LEN; + qs->cq_len = CMP_QUEUE_LEN; + + nic->rx_queues = qs->rq_cnt; + nic->tx_queues = qs->sq_cnt; + nic->xdp_tx_queues = 0; + + return 0; +} + +int nicvf_config_data_transfer(struct nicvf *nic, bool enable) +{ + bool disable = false; + struct queue_set *qs = nic->qs; + struct queue_set *pqs = nic->pnicvf->qs; + int qidx; + + if (!qs) + return 0; + + /* Take primary VF's queue lengths. + * This is needed to take queue lengths set from ethtool + * into consideration. + */ + if (nic->sqs_mode && pqs) { + qs->cq_len = pqs->cq_len; + qs->sq_len = pqs->sq_len; + } + + if (enable) { + if (nicvf_alloc_resources(nic)) + return -ENOMEM; + + for (qidx = 0; qidx < qs->sq_cnt; qidx++) + nicvf_snd_queue_config(nic, qs, qidx, enable); + for (qidx = 0; qidx < qs->cq_cnt; qidx++) + nicvf_cmp_queue_config(nic, qs, qidx, enable); + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) + nicvf_rbdr_config(nic, qs, qidx, enable); + for (qidx = 0; qidx < qs->rq_cnt; qidx++) + nicvf_rcv_queue_config(nic, qs, qidx, enable); + } else { + for (qidx = 0; qidx < qs->rq_cnt; qidx++) + nicvf_rcv_queue_config(nic, qs, qidx, disable); + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) + nicvf_rbdr_config(nic, qs, qidx, disable); + for (qidx = 0; qidx < qs->sq_cnt; qidx++) + nicvf_snd_queue_config(nic, qs, qidx, disable); + for (qidx = 0; qidx < qs->cq_cnt; qidx++) + nicvf_cmp_queue_config(nic, qs, qidx, disable); + + nicvf_free_resources(nic); + } + + /* Reset RXQ's stats. + * SQ's stats will get reset automatically once SQ is reset. + */ + nicvf_reset_rcv_queue_stats(nic); + + return 0; +} + +/* Get a free desc from SQ + * returns descriptor ponter & descriptor number + */ +static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt) +{ + int qentry; + + qentry = sq->tail; + if (!sq->is_xdp) + atomic_sub(desc_cnt, &sq->free_cnt); + else + sq->xdp_free_cnt -= desc_cnt; + sq->tail += desc_cnt; + sq->tail &= (sq->dmem.q_len - 1); + + return qentry; +} + +/* Rollback to previous tail pointer when descriptors not used */ +static inline void nicvf_rollback_sq_desc(struct snd_queue *sq, + int qentry, int desc_cnt) +{ + sq->tail = qentry; + atomic_add(desc_cnt, &sq->free_cnt); +} + +/* Free descriptor back to SQ for future use */ +void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt) +{ + if (!sq->is_xdp) + atomic_add(desc_cnt, &sq->free_cnt); + else + sq->xdp_free_cnt += desc_cnt; + sq->head += desc_cnt; + sq->head &= (sq->dmem.q_len - 1); +} + +static inline int nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry) +{ + qentry++; + qentry &= (sq->dmem.q_len - 1); + return qentry; +} + +void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx) +{ + u64 sq_cfg; + + sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx); + sq_cfg |= NICVF_SQ_EN; + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg); + /* Ring doorbell so that H/W restarts processing SQEs */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0); +} + +void nicvf_sq_disable(struct nicvf *nic, int qidx) +{ + u64 sq_cfg; + + sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx); + sq_cfg &= ~NICVF_SQ_EN; + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg); +} + +void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, + int qidx) +{ + u64 head, tail; + struct sk_buff *skb; + struct nicvf *nic = netdev_priv(netdev); + struct sq_hdr_subdesc *hdr; + + head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4; + tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4; + while (sq->head != head) { + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head); + if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) { + nicvf_put_sq_desc(sq, 1); + continue; + } + skb = (struct sk_buff *)sq->skbuff[sq->head]; + if (skb) + dev_kfree_skb_any(skb); + atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets); + atomic64_add(hdr->tot_len, + (atomic64_t *)&netdev->stats.tx_bytes); + nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); + } +} + +/* XDP Transmit APIs */ +void nicvf_xdp_sq_doorbell(struct nicvf *nic, + struct snd_queue *sq, int sq_num) +{ + if (!sq->xdp_desc_cnt) + return; + + /* make sure all memory stores are done before ringing doorbell */ + wmb(); + + /* Inform HW to xmit all TSO segments */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, + sq_num, sq->xdp_desc_cnt); + sq->xdp_desc_cnt = 0; +} + +static inline void +nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry, + int subdesc_cnt, u64 data, int len) +{ + struct sq_hdr_subdesc *hdr; + + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); + memset(hdr, 0, SND_QUEUE_DESC_SIZE); + hdr->subdesc_type = SQ_DESC_TYPE_HEADER; + hdr->subdesc_cnt = subdesc_cnt; + hdr->tot_len = len; + hdr->post_cqe = 1; + sq->xdp_page[qentry] = (u64)virt_to_page((void *)data); +} + +int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq, + u64 bufaddr, u64 dma_addr, u16 len) +{ + int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT; + int qentry; + + if (subdesc_cnt > sq->xdp_free_cnt) + return 0; + + qentry = nicvf_get_sq_desc(sq, subdesc_cnt); + + nicvf_xdp_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, bufaddr, len); + + qentry = nicvf_get_nxt_sqentry(sq, qentry); + nicvf_sq_add_gather_subdesc(sq, qentry, len, dma_addr); + + sq->xdp_desc_cnt += subdesc_cnt; + + return 1; +} + +/* Calculate no of SQ subdescriptors needed to transmit all + * segments of this TSO packet. + * Taken from 'Tilera network driver' with a minor modification. + */ +static int nicvf_tso_count_subdescs(struct sk_buff *skb) +{ + struct skb_shared_info *sh = skb_shinfo(skb); + unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + unsigned int data_len = skb->len - sh_len; + unsigned int p_len = sh->gso_size; + long f_id = -1; /* id of the current fragment */ + long f_size = skb_headlen(skb) - sh_len; /* current fragment size */ + long f_used = 0; /* bytes used from the current fragment */ + long n; /* size of the current piece of payload */ + int num_edescs = 0; + int segment; + + for (segment = 0; segment < sh->gso_segs; segment++) { + unsigned int p_used = 0; + + /* One edesc for header and for each piece of the payload. */ + for (num_edescs++; p_used < p_len; num_edescs++) { + /* Advance as needed. */ + while (f_used >= f_size) { + f_id++; + f_size = skb_frag_size(&sh->frags[f_id]); + f_used = 0; + } + + /* Use bytes from the current fragment. */ + n = p_len - p_used; + if (n > f_size - f_used) + n = f_size - f_used; + f_used += n; + p_used += n; + } + + /* The last segment may be less than gso_size. */ + data_len -= p_len; + if (data_len < p_len) + p_len = data_len; + } + + /* '+ gso_segs' for SQ_HDR_SUDESCs for each segment */ + return num_edescs + sh->gso_segs; +} + +#define POST_CQE_DESC_COUNT 2 + +/* Get the number of SQ descriptors needed to xmit this skb */ +static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb) +{ + int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT; + + if (skb_shinfo(skb)->gso_size && !nic->hw_tso) { + subdesc_cnt = nicvf_tso_count_subdescs(skb); + return subdesc_cnt; + } + + /* Dummy descriptors to get TSO pkt completion notification */ + if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) + subdesc_cnt += POST_CQE_DESC_COUNT; + + if (skb_shinfo(skb)->nr_frags) + subdesc_cnt += skb_shinfo(skb)->nr_frags; + + return subdesc_cnt; +} + +/* Add SQ HEADER subdescriptor. + * First subdescriptor for every send descriptor. + */ +static inline void +nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, + int subdesc_cnt, struct sk_buff *skb, int len) +{ + int proto; + struct sq_hdr_subdesc *hdr; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + + ip.hdr = skb_network_header(skb); + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); + memset(hdr, 0, SND_QUEUE_DESC_SIZE); + hdr->subdesc_type = SQ_DESC_TYPE_HEADER; + + if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) { + /* post_cqe = 0, to avoid HW posting a CQE for every TSO + * segment transmitted on 88xx. + */ + hdr->subdesc_cnt = subdesc_cnt - POST_CQE_DESC_COUNT; + } else { + sq->skbuff[qentry] = (u64)skb; + /* Enable notification via CQE after processing SQE */ + hdr->post_cqe = 1; + /* No of subdescriptors following this */ + hdr->subdesc_cnt = subdesc_cnt; + } + hdr->tot_len = len; + + /* Offload checksum calculation to HW */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (ip.v4->version == 4) + hdr->csum_l3 = 1; /* Enable IP csum calculation */ + hdr->l3_offset = skb_network_offset(skb); + hdr->l4_offset = skb_transport_offset(skb); + + proto = (ip.v4->version == 4) ? ip.v4->protocol : + ip.v6->nexthdr; + + switch (proto) { + case IPPROTO_TCP: + hdr->csum_l4 = SEND_L4_CSUM_TCP; + break; + case IPPROTO_UDP: + hdr->csum_l4 = SEND_L4_CSUM_UDP; + break; + case IPPROTO_SCTP: + hdr->csum_l4 = SEND_L4_CSUM_SCTP; + break; + } + } + + if (nic->hw_tso && skb_shinfo(skb)->gso_size) { + hdr->tso = 1; + hdr->tso_start = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr->tso_max_paysize = skb_shinfo(skb)->gso_size; + /* For non-tunneled pkts, point this to L2 ethertype */ + hdr->inner_l3_offset = skb_network_offset(skb) - 2; + this_cpu_inc(nic->pnicvf->drv_stats->tx_tso); + } + + /* Check if timestamp is requested */ + if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + skb_tx_timestamp(skb); + return; + } + + /* Tx timestamping not supported along with TSO, so ignore request */ + if (skb_shinfo(skb)->gso_size) + return; + + /* HW supports only a single outstanding packet to timestamp */ + if (!atomic_add_unless(&nic->pnicvf->tx_ptp_skbs, 1, 1)) + return; + + /* Mark the SKB for later reference */ + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + + /* Finally enable timestamp generation + * Since 'post_cqe' is also set, two CQEs will be posted + * for this packet i.e CQE_TYPE_SEND and CQE_TYPE_SEND_PTP. + */ + hdr->tstmp = 1; +} + +/* SQ GATHER subdescriptor + * Must follow HDR descriptor + */ +static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry, + int size, u64 data) +{ + struct sq_gather_subdesc *gather; + + qentry &= (sq->dmem.q_len - 1); + gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry); + + memset(gather, 0, SND_QUEUE_DESC_SIZE); + gather->subdesc_type = SQ_DESC_TYPE_GATHER; + gather->ld_type = NIC_SEND_LD_TYPE_E_LDD; + gather->size = size; + gather->addr = data; +} + +/* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO + * packet so that a CQE is posted as a notifation for transmission of + * TSO packet. + */ +static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry, + int tso_sqe, struct sk_buff *skb) +{ + struct sq_imm_subdesc *imm; + struct sq_hdr_subdesc *hdr; + + sq->skbuff[qentry] = (u64)skb; + + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); + memset(hdr, 0, SND_QUEUE_DESC_SIZE); + hdr->subdesc_type = SQ_DESC_TYPE_HEADER; + /* Enable notification via CQE after processing SQE */ + hdr->post_cqe = 1; + /* There is no packet to transmit here */ + hdr->dont_send = 1; + hdr->subdesc_cnt = POST_CQE_DESC_COUNT - 1; + hdr->tot_len = 1; + /* Actual TSO header SQE index, needed for cleanup */ + hdr->rsvd2 = tso_sqe; + + qentry = nicvf_get_nxt_sqentry(sq, qentry); + imm = (struct sq_imm_subdesc *)GET_SQ_DESC(sq, qentry); + memset(imm, 0, SND_QUEUE_DESC_SIZE); + imm->subdesc_type = SQ_DESC_TYPE_IMMEDIATE; + imm->len = 1; +} + +static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb, + int sq_num, int desc_cnt) +{ + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(nic->pnicvf->netdev, + skb_get_queue_mapping(skb)); + + netdev_tx_sent_queue(txq, skb->len); + + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + + /* Inform HW to xmit all TSO segments */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, + sq_num, desc_cnt); +} + +/* Segment a TSO packet into 'gso_size' segments and append + * them to SQ for transfer + */ +static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, + int sq_num, int qentry, struct sk_buff *skb) +{ + struct tso_t tso; + int seg_subdescs = 0, desc_cnt = 0; + int seg_len, total_len, data_left; + int hdr_qentry = qentry; + int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + + tso_start(skb, &tso); + total_len = skb->len - hdr_len; + while (total_len > 0) { + char *hdr; + + /* Save Qentry for adding HDR_SUBDESC at the end */ + hdr_qentry = qentry; + + data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); + total_len -= data_left; + + /* Add segment's header */ + qentry = nicvf_get_nxt_sqentry(sq, qentry); + hdr = sq->tso_hdrs + qentry * TSO_HEADER_SIZE; + tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0); + nicvf_sq_add_gather_subdesc(sq, qentry, hdr_len, + sq->tso_hdrs_phys + + qentry * TSO_HEADER_SIZE); + /* HDR_SUDESC + GATHER */ + seg_subdescs = 2; + seg_len = hdr_len; + + /* Add segment's payload fragments */ + while (data_left > 0) { + int size; + + size = min_t(int, tso.size, data_left); + + qentry = nicvf_get_nxt_sqentry(sq, qentry); + nicvf_sq_add_gather_subdesc(sq, qentry, size, + virt_to_phys(tso.data)); + seg_subdescs++; + seg_len += size; + + data_left -= size; + tso_build_data(skb, &tso, size); + } + nicvf_sq_add_hdr_subdesc(nic, sq, hdr_qentry, + seg_subdescs - 1, skb, seg_len); + sq->skbuff[hdr_qentry] = (u64)NULL; + qentry = nicvf_get_nxt_sqentry(sq, qentry); + + desc_cnt += seg_subdescs; + } + /* Save SKB in the last segment for freeing */ + sq->skbuff[hdr_qentry] = (u64)skb; + + nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt); + + this_cpu_inc(nic->pnicvf->drv_stats->tx_tso); + return 1; +} + +/* Append an skb to a SQ for packet transfer. */ +int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq, + struct sk_buff *skb, u8 sq_num) +{ + int i, size; + int subdesc_cnt, hdr_sqe = 0; + int qentry; + u64 dma_addr; + + subdesc_cnt = nicvf_sq_subdesc_required(nic, skb); + if (subdesc_cnt > atomic_read(&sq->free_cnt)) + goto append_fail; + + qentry = nicvf_get_sq_desc(sq, subdesc_cnt); + + /* Check if its a TSO packet */ + if (skb_shinfo(skb)->gso_size && !nic->hw_tso) + return nicvf_sq_append_tso(nic, sq, sq_num, qentry, skb); + + /* Add SQ header subdesc */ + nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1, + skb, skb->len); + hdr_sqe = qentry; + + /* Add SQ gather subdescs */ + qentry = nicvf_get_nxt_sqentry(sq, qentry); + size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len; + /* HW will ensure data coherency, CPU sync not required */ + dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data), + offset_in_page(skb->data), size, + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { + nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt); + return 0; + } + + nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr); + + /* Check for scattered buffer */ + if (!skb_is_nonlinear(skb)) + goto doorbell; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const struct skb_frag_struct *frag; + + frag = &skb_shinfo(skb)->frags[i]; + + qentry = nicvf_get_nxt_sqentry(sq, qentry); + size = skb_frag_size(frag); + dma_addr = dma_map_page_attrs(&nic->pdev->dev, + skb_frag_page(frag), + frag->page_offset, size, + DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { + /* Free entire chain of mapped buffers + * here 'i' = frags mapped + above mapped skb->data + */ + nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i); + nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt); + return 0; + } + nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr); + } + +doorbell: + if (nic->t88 && skb_shinfo(skb)->gso_size) { + qentry = nicvf_get_nxt_sqentry(sq, qentry); + nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb); + } + + nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt); + + return 1; + +append_fail: + /* Use original PCI dev for debug log */ + nic = nic->pnicvf; + netdev_dbg(nic->netdev, "Not enough SQ descriptors to xmit pkt\n"); + return 0; +} + +static inline unsigned frag_num(unsigned i) +{ +#ifdef __BIG_ENDIAN + return (i & ~3) + 3 - (i & 3); +#else + return i; +#endif +} + +static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr, + u64 buf_addr, bool xdp) +{ + struct page *page = NULL; + int len = RCV_FRAG_LEN; + + if (xdp) { + page = virt_to_page(phys_to_virt(buf_addr)); + /* Check if it's a recycled page, if not + * unmap the DMA mapping. + * + * Recycled page holds an extra reference. + */ + if (page_ref_count(page) != 1) + return; + + len += XDP_PACKET_HEADROOM; + /* Receive buffers in XDP mode are mapped from page start */ + dma_addr &= PAGE_MASK; + } + dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, len, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); +} + +/* Returns SKB for a received packet */ +struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, + struct cqe_rx_t *cqe_rx, bool xdp) +{ + int frag; + int payload_len = 0; + struct sk_buff *skb = NULL; + struct page *page; + int offset; + u16 *rb_lens = NULL; + u64 *rb_ptrs = NULL; + u64 phys_addr; + + rb_lens = (void *)cqe_rx + (3 * sizeof(u64)); + /* Except 88xx pass1 on all other chips CQE_RX2_S is added to + * CQE_RX at word6, hence buffer pointers move by word + * + * Use existing 'hw_tso' flag which will be set for all chips + * except 88xx pass1 instead of a additional cache line + * access (or miss) by using pci dev's revision. + */ + if (!nic->hw_tso) + rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64)); + else + rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64)); + + for (frag = 0; frag < cqe_rx->rb_cnt; frag++) { + payload_len = rb_lens[frag_num(frag)]; + phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs); + if (!phys_addr) { + if (skb) + dev_kfree_skb_any(skb); + return NULL; + } + + if (!frag) { + /* First fragment */ + nicvf_unmap_rcv_buffer(nic, + *rb_ptrs - cqe_rx->align_pad, + phys_addr, xdp); + skb = nicvf_rb_ptr_to_skb(nic, + phys_addr - cqe_rx->align_pad, + payload_len); + if (!skb) + return NULL; + skb_reserve(skb, cqe_rx->align_pad); + skb_put(skb, payload_len); + } else { + /* Add fragments */ + nicvf_unmap_rcv_buffer(nic, *rb_ptrs, phys_addr, xdp); + page = virt_to_page(phys_to_virt(phys_addr)); + offset = phys_to_virt(phys_addr) - page_address(page); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + offset, payload_len, RCV_FRAG_LEN); + } + /* Next buffer pointer */ + rb_ptrs++; + } + return skb; +} + +static u64 nicvf_int_type_to_mask(int int_type, int q_idx) +{ + u64 reg_val; + + switch (int_type) { + case NICVF_INTR_CQ: + reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); + break; + case NICVF_INTR_SQ: + reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); + break; + case NICVF_INTR_RBDR: + reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); + break; + case NICVF_INTR_PKT_DROP: + reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT); + break; + case NICVF_INTR_TCP_TIMER: + reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); + break; + case NICVF_INTR_MBOX: + reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT); + break; + case NICVF_INTR_QS_ERR: + reg_val = (1ULL << NICVF_INTR_QS_ERR_SHIFT); + break; + default: + reg_val = 0; + } + + return reg_val; +} + +/* Enable interrupt */ +void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx) +{ + u64 mask = nicvf_int_type_to_mask(int_type, q_idx); + + if (!mask) { + netdev_dbg(nic->netdev, + "Failed to enable interrupt: unknown type\n"); + return; + } + nicvf_reg_write(nic, NIC_VF_ENA_W1S, + nicvf_reg_read(nic, NIC_VF_ENA_W1S) | mask); +} + +/* Disable interrupt */ +void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx) +{ + u64 mask = nicvf_int_type_to_mask(int_type, q_idx); + + if (!mask) { + netdev_dbg(nic->netdev, + "Failed to disable interrupt: unknown type\n"); + return; + } + + nicvf_reg_write(nic, NIC_VF_ENA_W1C, mask); +} + +/* Clear interrupt */ +void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx) +{ + u64 mask = nicvf_int_type_to_mask(int_type, q_idx); + + if (!mask) { + netdev_dbg(nic->netdev, + "Failed to clear interrupt: unknown type\n"); + return; + } + + nicvf_reg_write(nic, NIC_VF_INT, mask); +} + +/* Check if interrupt is enabled */ +int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx) +{ + u64 mask = nicvf_int_type_to_mask(int_type, q_idx); + /* If interrupt type is unknown, we treat it disabled. */ + if (!mask) { + netdev_dbg(nic->netdev, + "Failed to check interrupt enable: unknown type\n"); + return 0; + } + + return mask & nicvf_reg_read(nic, NIC_VF_ENA_W1S); +} + +void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx) +{ + struct rcv_queue *rq; + +#define GET_RQ_STATS(reg) \ + nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\ + (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3)) + + rq = &nic->qs->rq[rq_idx]; + rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS); + rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS); +} + +void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx) +{ + struct snd_queue *sq; + +#define GET_SQ_STATS(reg) \ + nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\ + (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3)) + + sq = &nic->qs->sq[sq_idx]; + sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS); + sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS); +} + +/* Check for errors in the receive cmp.queue entry */ +int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) +{ + netif_err(nic, rx_err, nic->netdev, + "RX error CQE err_level 0x%x err_opcode 0x%x\n", + cqe_rx->err_level, cqe_rx->err_opcode); + + switch (cqe_rx->err_opcode) { + case CQ_RX_ERROP_RE_PARTIAL: + this_cpu_inc(nic->drv_stats->rx_bgx_truncated_pkts); + break; + case CQ_RX_ERROP_RE_JABBER: + this_cpu_inc(nic->drv_stats->rx_jabber_errs); + break; + case CQ_RX_ERROP_RE_FCS: + this_cpu_inc(nic->drv_stats->rx_fcs_errs); + break; + case CQ_RX_ERROP_RE_RX_CTL: + this_cpu_inc(nic->drv_stats->rx_bgx_errs); + break; + case CQ_RX_ERROP_PREL2_ERR: + this_cpu_inc(nic->drv_stats->rx_prel2_errs); + break; + case CQ_RX_ERROP_L2_MAL: + this_cpu_inc(nic->drv_stats->rx_l2_hdr_malformed); + break; + case CQ_RX_ERROP_L2_OVERSIZE: + this_cpu_inc(nic->drv_stats->rx_oversize); + break; + case CQ_RX_ERROP_L2_UNDERSIZE: + this_cpu_inc(nic->drv_stats->rx_undersize); + break; + case CQ_RX_ERROP_L2_LENMISM: + this_cpu_inc(nic->drv_stats->rx_l2_len_mismatch); + break; + case CQ_RX_ERROP_L2_PCLP: + this_cpu_inc(nic->drv_stats->rx_l2_pclp); + break; + case CQ_RX_ERROP_IP_NOT: + this_cpu_inc(nic->drv_stats->rx_ip_ver_errs); + break; + case CQ_RX_ERROP_IP_CSUM_ERR: + this_cpu_inc(nic->drv_stats->rx_ip_csum_errs); + break; + case CQ_RX_ERROP_IP_MAL: + this_cpu_inc(nic->drv_stats->rx_ip_hdr_malformed); + break; + case CQ_RX_ERROP_IP_MALD: + this_cpu_inc(nic->drv_stats->rx_ip_payload_malformed); + break; + case CQ_RX_ERROP_IP_HOP: + this_cpu_inc(nic->drv_stats->rx_ip_ttl_errs); + break; + case CQ_RX_ERROP_L3_PCLP: + this_cpu_inc(nic->drv_stats->rx_l3_pclp); + break; + case CQ_RX_ERROP_L4_MAL: + this_cpu_inc(nic->drv_stats->rx_l4_malformed); + break; + case CQ_RX_ERROP_L4_CHK: + this_cpu_inc(nic->drv_stats->rx_l4_csum_errs); + break; + case CQ_RX_ERROP_UDP_LEN: + this_cpu_inc(nic->drv_stats->rx_udp_len_errs); + break; + case CQ_RX_ERROP_L4_PORT: + this_cpu_inc(nic->drv_stats->rx_l4_port_errs); + break; + case CQ_RX_ERROP_TCP_FLAG: + this_cpu_inc(nic->drv_stats->rx_tcp_flag_errs); + break; + case CQ_RX_ERROP_TCP_OFFSET: + this_cpu_inc(nic->drv_stats->rx_tcp_offset_errs); + break; + case CQ_RX_ERROP_L4_PCLP: + this_cpu_inc(nic->drv_stats->rx_l4_pclp); + break; + case CQ_RX_ERROP_RBDR_TRUNC: + this_cpu_inc(nic->drv_stats->rx_truncated_pkts); + break; + } + + return 1; +} + +/* Check for errors in the send cmp.queue entry */ +int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx) +{ + switch (cqe_tx->send_status) { + case CQ_TX_ERROP_DESC_FAULT: + this_cpu_inc(nic->drv_stats->tx_desc_fault); + break; + case CQ_TX_ERROP_HDR_CONS_ERR: + this_cpu_inc(nic->drv_stats->tx_hdr_cons_err); + break; + case CQ_TX_ERROP_SUBDC_ERR: + this_cpu_inc(nic->drv_stats->tx_subdesc_err); + break; + case CQ_TX_ERROP_MAX_SIZE_VIOL: + this_cpu_inc(nic->drv_stats->tx_max_size_exceeded); + break; + case CQ_TX_ERROP_IMM_SIZE_OFLOW: + this_cpu_inc(nic->drv_stats->tx_imm_size_oflow); + break; + case CQ_TX_ERROP_DATA_SEQUENCE_ERR: + this_cpu_inc(nic->drv_stats->tx_data_seq_err); + break; + case CQ_TX_ERROP_MEM_SEQUENCE_ERR: + this_cpu_inc(nic->drv_stats->tx_mem_seq_err); + break; + case CQ_TX_ERROP_LOCK_VIOL: + this_cpu_inc(nic->drv_stats->tx_lock_viol); + break; + case CQ_TX_ERROP_DATA_FAULT: + this_cpu_inc(nic->drv_stats->tx_data_fault); + break; + case CQ_TX_ERROP_TSTMP_CONFLICT: + this_cpu_inc(nic->drv_stats->tx_tstmp_conflict); + break; + case CQ_TX_ERROP_TSTMP_TIMEOUT: + this_cpu_inc(nic->drv_stats->tx_tstmp_timeout); + break; + case CQ_TX_ERROP_MEM_FAULT: + this_cpu_inc(nic->drv_stats->tx_mem_fault); + break; + case CQ_TX_ERROP_CK_OVERLAP: + this_cpu_inc(nic->drv_stats->tx_csum_overlap); + break; + case CQ_TX_ERROP_CK_OFLOW: + this_cpu_inc(nic->drv_stats->tx_csum_overflow); + break; + } + + return 1; +} |