summaryrefslogtreecommitdiffstats
path: root/drivers/dma/idxd/submit.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
commitace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
treeb2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/dma/idxd/submit.c
parentInitial commit. (diff)
downloadlinux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/dma/idxd/submit.c')
-rw-r--r--drivers/dma/idxd/submit.c217
1 files changed, 217 insertions, 0 deletions
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
new file mode 100644
index 000000000..3f922518e
--- /dev/null
+++ b/drivers/dma/idxd/submit.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <uapi/linux/idxd.h>
+#include "idxd.h"
+#include "registers.h"
+
+static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
+{
+ struct idxd_desc *desc;
+ struct idxd_device *idxd = wq->idxd;
+
+ desc = wq->descs[idx];
+ memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
+ memset(desc->completion, 0, idxd->data->compl_size);
+ desc->cpu = cpu;
+
+ if (device_pasid_enabled(idxd))
+ desc->hw->pasid = idxd->pasid;
+
+ return desc;
+}
+
+struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
+{
+ int cpu, idx;
+ struct idxd_device *idxd = wq->idxd;
+ DEFINE_SBQ_WAIT(wait);
+ struct sbq_wait_state *ws;
+ struct sbitmap_queue *sbq;
+
+ if (idxd->state != IDXD_DEV_ENABLED)
+ return ERR_PTR(-EIO);
+
+ sbq = &wq->sbq;
+ idx = sbitmap_queue_get(sbq, &cpu);
+ if (idx < 0) {
+ if (optype == IDXD_OP_NONBLOCK)
+ return ERR_PTR(-EAGAIN);
+ } else {
+ return __get_desc(wq, idx, cpu);
+ }
+
+ ws = &sbq->ws[0];
+ for (;;) {
+ sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
+ if (signal_pending_state(TASK_INTERRUPTIBLE, current))
+ break;
+ idx = sbitmap_queue_get(sbq, &cpu);
+ if (idx >= 0)
+ break;
+ schedule();
+ }
+
+ sbitmap_finish_wait(sbq, ws, &wait);
+ if (idx < 0)
+ return ERR_PTR(-EAGAIN);
+
+ return __get_desc(wq, idx, cpu);
+}
+
+void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
+{
+ int cpu = desc->cpu;
+
+ desc->cpu = -1;
+ sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
+}
+
+static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
+ struct idxd_desc *desc)
+{
+ struct idxd_desc *d, *n;
+
+ lockdep_assert_held(&ie->list_lock);
+ list_for_each_entry_safe(d, n, &ie->work_list, list) {
+ if (d == desc) {
+ list_del(&d->list);
+ return d;
+ }
+ }
+
+ /*
+ * At this point, the desc needs to be aborted is held by the completion
+ * handler where it has taken it off the pending list but has not added to the
+ * work list. It will be cleaned up by the interrupt handler when it sees the
+ * IDXD_COMP_DESC_ABORT for completion status.
+ */
+ return NULL;
+}
+
+static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
+ struct idxd_desc *desc)
+{
+ struct idxd_desc *d, *t, *found = NULL;
+ struct llist_node *head;
+ LIST_HEAD(flist);
+
+ desc->completion->status = IDXD_COMP_DESC_ABORT;
+ /*
+ * Grab the list lock so it will block the irq thread handler. This allows the
+ * abort code to locate the descriptor need to be aborted.
+ */
+ spin_lock(&ie->list_lock);
+ head = llist_del_all(&ie->pending_llist);
+ if (head) {
+ llist_for_each_entry_safe(d, t, head, llnode) {
+ if (d == desc) {
+ found = desc;
+ continue;
+ }
+
+ if (d->completion->status)
+ list_add_tail(&d->list, &flist);
+ else
+ list_add_tail(&d->list, &ie->work_list);
+ }
+ }
+
+ if (!found)
+ found = list_abort_desc(wq, ie, desc);
+ spin_unlock(&ie->list_lock);
+
+ if (found)
+ idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
+
+ /*
+ * completing the descriptor will return desc to allocator and
+ * the desc can be acquired by a different process and the
+ * desc->list can be modified. Delete desc from list so the
+ * list trasversing does not get corrupted by the other process.
+ */
+ list_for_each_entry_safe(d, t, &flist, list) {
+ list_del_init(&d->list);
+ idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
+ }
+}
+
+/*
+ * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
+ * has better control of number of descriptors being submitted to a shared wq by limiting
+ * the number of driver allocated descriptors to the wq size. However, when the swq is
+ * exported to a guest kernel, it may be shared with multiple guest kernels. This means
+ * the likelihood of getting busy returned on the swq when submitting goes significantly up.
+ * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
+ * up. The sysfs knob can be tuned by the system administrator.
+ */
+int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
+{
+ unsigned int retries = wq->enqcmds_retries;
+ int rc;
+
+ do {
+ rc = enqcmds(portal, desc);
+ if (rc == 0)
+ break;
+ cpu_relax();
+ } while (retries--);
+
+ return rc;
+}
+
+int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct idxd_irq_entry *ie = NULL;
+ u32 desc_flags = desc->hw->flags;
+ void __iomem *portal;
+ int rc;
+
+ if (idxd->state != IDXD_DEV_ENABLED)
+ return -EIO;
+
+ if (!percpu_ref_tryget_live(&wq->wq_active)) {
+ wait_for_completion(&wq->wq_resurrect);
+ if (!percpu_ref_tryget_live(&wq->wq_active))
+ return -ENXIO;
+ }
+
+ portal = idxd_wq_portal_addr(wq);
+
+ /*
+ * Pending the descriptor to the lockless list for the irq_entry
+ * that we designated the descriptor to.
+ */
+ if (desc_flags & IDXD_OP_FLAG_RCI) {
+ ie = &wq->ie;
+ desc->hw->int_handle = ie->int_handle;
+ llist_add(&desc->llnode, &ie->pending_llist);
+ }
+
+ /*
+ * The wmb() flushes writes to coherent DMA data before
+ * possibly triggering a DMA read. The wmb() is necessary
+ * even on UP because the recipient is a device.
+ */
+ wmb();
+
+ if (wq_dedicated(wq)) {
+ iosubmit_cmds512(portal, desc->hw, 1);
+ } else {
+ rc = idxd_enqcmds(wq, portal, desc->hw);
+ if (rc < 0) {
+ percpu_ref_put(&wq->wq_active);
+ /* abort operation frees the descriptor */
+ if (ie)
+ llist_abort_desc(wq, ie, desc);
+ return rc;
+ }
+ }
+
+ percpu_ref_put(&wq->wq_active);
+ return 0;
+}