diff options
Diffstat (limited to '')
-rw-r--r-- | drivers/dma/idxd/submit.c | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c new file mode 100644 index 000000000..c01db23e3 --- /dev/null +++ b/drivers/dma/idxd/submit.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <uapi/linux/idxd.h> +#include "idxd.h" +#include "registers.h" + +static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) +{ + struct idxd_desc *desc; + struct idxd_device *idxd = wq->idxd; + + desc = wq->descs[idx]; + memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); + memset(desc->completion, 0, idxd->data->compl_size); + desc->cpu = cpu; + + if (device_pasid_enabled(idxd)) + desc->hw->pasid = idxd->pasid; + + return desc; +} + +struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype) +{ + int cpu, idx; + struct idxd_device *idxd = wq->idxd; + DEFINE_SBQ_WAIT(wait); + struct sbq_wait_state *ws; + struct sbitmap_queue *sbq; + + if (idxd->state != IDXD_DEV_ENABLED) + return ERR_PTR(-EIO); + + sbq = &wq->sbq; + idx = sbitmap_queue_get(sbq, &cpu); + if (idx < 0) { + if (optype == IDXD_OP_NONBLOCK) + return ERR_PTR(-EAGAIN); + } else { + return __get_desc(wq, idx, cpu); + } + + ws = &sbq->ws[0]; + for (;;) { + sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE); + if (signal_pending_state(TASK_INTERRUPTIBLE, current)) + break; + idx = sbitmap_queue_get(sbq, &cpu); + if (idx >= 0) + break; + schedule(); + } + + sbitmap_finish_wait(sbq, ws, &wait); + if (idx < 0) + return ERR_PTR(-EAGAIN); + + return __get_desc(wq, idx, cpu); +} + +void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) +{ + int cpu = desc->cpu; + + desc->cpu = -1; + sbitmap_queue_clear(&wq->sbq, desc->id, cpu); +} + +static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *n; + + lockdep_assert_held(&ie->list_lock); + list_for_each_entry_safe(d, n, &ie->work_list, list) { + if (d == desc) { + list_del(&d->list); + return d; + } + } + + /* + * At this point, the desc needs to be aborted is held by the completion + * handler where it has taken it off the pending list but has not added to the + * work list. It will be cleaned up by the interrupt handler when it sees the + * IDXD_COMP_DESC_ABORT for completion status. + */ + return NULL; +} + +static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *t, *found = NULL; + struct llist_node *head; + LIST_HEAD(flist); + + desc->completion->status = IDXD_COMP_DESC_ABORT; + /* + * Grab the list lock so it will block the irq thread handler. This allows the + * abort code to locate the descriptor need to be aborted. + */ + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) { + if (d == desc) { + found = desc; + continue; + } + + if (d->completion->status) + list_add_tail(&d->list, &flist); + else + list_add_tail(&d->list, &ie->work_list); + } + } + + if (!found) + found = list_abort_desc(wq, ie, desc); + spin_unlock(&ie->list_lock); + + if (found) + idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false); + + /* + * completing the descriptor will return desc to allocator and + * the desc can be acquired by a different process and the + * desc->list can be modified. Delete desc from list so the + * list trasversing does not get corrupted by the other process. + */ + list_for_each_entry_safe(d, t, &flist, list) { + list_del_init(&d->list); + idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true); + } +} + +/* + * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver + * has better control of number of descriptors being submitted to a shared wq by limiting + * the number of driver allocated descriptors to the wq size. However, when the swq is + * exported to a guest kernel, it may be shared with multiple guest kernels. This means + * the likelihood of getting busy returned on the swq when submitting goes significantly up. + * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving + * up. The sysfs knob can be tuned by the system administrator. + */ +int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) +{ + unsigned int retries = wq->enqcmds_retries; + int rc; + + do { + rc = enqcmds(portal, desc); + if (rc == 0) + break; + cpu_relax(); + } while (retries--); + + return rc; +} + +int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) +{ + struct idxd_device *idxd = wq->idxd; + struct idxd_irq_entry *ie = NULL; + u32 desc_flags = desc->hw->flags; + void __iomem *portal; + int rc; + + if (idxd->state != IDXD_DEV_ENABLED) + return -EIO; + + if (!percpu_ref_tryget_live(&wq->wq_active)) { + wait_for_completion(&wq->wq_resurrect); + if (!percpu_ref_tryget_live(&wq->wq_active)) + return -ENXIO; + } + + portal = idxd_wq_portal_addr(wq); + + /* + * The wmb() flushes writes to coherent DMA data before + * possibly triggering a DMA read. The wmb() is necessary + * even on UP because the recipient is a device. + */ + wmb(); + + /* + * Pending the descriptor to the lockless list for the irq_entry + * that we designated the descriptor to. + */ + if (desc_flags & IDXD_OP_FLAG_RCI) { + ie = &wq->ie; + desc->hw->int_handle = ie->int_handle; + llist_add(&desc->llnode, &ie->pending_llist); + } + + if (wq_dedicated(wq)) { + iosubmit_cmds512(portal, desc->hw, 1); + } else { + rc = idxd_enqcmds(wq, portal, desc->hw); + if (rc < 0) { + percpu_ref_put(&wq->wq_active); + /* abort operation frees the descriptor */ + if (ie) + llist_abort_desc(wq, ie, desc); + return rc; + } + } + + percpu_ref_put(&wq->wq_active); + return 0; +} |