summaryrefslogtreecommitdiffstats
path: root/drivers/vfio/pci
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio/pci')
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c6
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c7
-rw-r--r--drivers/vfio/pci/vfio_pci_igd.c2
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c206
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c2
5 files changed, 133 insertions, 90 deletions
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 4a350421c..523e0144c 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1244,7 +1244,7 @@ static int vfio_msi_cap_len(struct vfio_pci_core_device *vdev, u8 pos)
if (vdev->msi_perm)
return len;
- vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL);
+ vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL_ACCOUNT);
if (!vdev->msi_perm)
return -ENOMEM;
@@ -1731,11 +1731,11 @@ int vfio_config_init(struct vfio_pci_core_device *vdev)
* no requirements on the length of a capability, so the gap between
* capabilities needs byte granularity.
*/
- map = kmalloc(pdev->cfg_size, GFP_KERNEL);
+ map = kmalloc(pdev->cfg_size, GFP_KERNEL_ACCOUNT);
if (!map)
return -ENOMEM;
- vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL);
+ vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL_ACCOUNT);
if (!vconfig) {
kfree(map);
return -ENOMEM;
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index e030c2120..f357fd157 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -141,7 +141,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
* of the exclusive page in case that hot-add
* device's bar is assigned into it.
*/
- dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
+ dummy_res =
+ kzalloc(sizeof(*dummy_res), GFP_KERNEL_ACCOUNT);
if (dummy_res == NULL)
goto no_mmap;
@@ -856,7 +857,7 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
region = krealloc(vdev->region,
(vdev->num_regions + 1) * sizeof(*region),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!region)
return -ENOMEM;
@@ -1637,7 +1638,7 @@ static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev,
{
struct vfio_pci_mmap_vma *mmap_vma;
- mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);
+ mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT);
if (!mmap_vma)
return -ENOMEM;
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
index 5e6ca5926..dd70e2431 100644
--- a/drivers/vfio/pci/vfio_pci_igd.c
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -180,7 +180,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev)
if (!addr || !(~addr))
return -ENODEV;
- opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL);
+ opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL_ACCOUNT);
if (!opregionvbt)
return -ENOMEM;
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 40c3d7cf1..03246a59b 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -55,17 +55,24 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
{
struct vfio_pci_core_device *vdev = opaque;
- if (likely(is_intx(vdev) && !vdev->virq_disabled))
- eventfd_signal(vdev->ctx[0].trigger, 1);
+ if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
+ struct eventfd_ctx *trigger;
+
+ trigger = READ_ONCE(vdev->ctx[0].trigger);
+ if (likely(trigger))
+ eventfd_signal(trigger, 1);
+ }
}
/* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
-bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
+static bool __vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
unsigned long flags;
bool masked_changed = false;
+ lockdep_assert_held(&vdev->igate);
+
spin_lock_irqsave(&vdev->irqlock, flags);
/*
@@ -95,6 +102,17 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
return masked_changed;
}
+bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
+{
+ bool mask_changed;
+
+ mutex_lock(&vdev->igate);
+ mask_changed = __vfio_pci_intx_mask(vdev);
+ mutex_unlock(&vdev->igate);
+
+ return mask_changed;
+}
+
/*
* If this is triggered by an eventfd, we can't call eventfd_signal
* or else we'll deadlock on the eventfd wait queue. Return >0 when
@@ -137,12 +155,21 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
return ret;
}
-void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
+static void __vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
{
+ lockdep_assert_held(&vdev->igate);
+
if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
vfio_send_intx_eventfd(vdev, NULL);
}
+void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
+{
+ mutex_lock(&vdev->igate);
+ __vfio_pci_intx_unmask(vdev);
+ mutex_unlock(&vdev->igate);
+}
+
static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
{
struct vfio_pci_core_device *vdev = dev_id;
@@ -169,95 +196,104 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
return ret;
}
-static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
+static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
+ struct eventfd_ctx *trigger)
{
+ struct pci_dev *pdev = vdev->pdev;
+ unsigned long irqflags;
+ char *name;
+ int ret;
+
if (!is_irq_none(vdev))
return -EINVAL;
- if (!vdev->pdev->irq)
+ if (!pdev->irq)
return -ENODEV;
- vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
+ name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
+ if (!name)
+ return -ENOMEM;
+
+ vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT);
if (!vdev->ctx)
return -ENOMEM;
vdev->num_ctx = 1;
+ vdev->ctx[0].name = name;
+ vdev->ctx[0].trigger = trigger;
+
/*
- * If the virtual interrupt is masked, restore it. Devices
- * supporting DisINTx can be masked at the hardware level
- * here, non-PCI-2.3 devices will have to wait until the
- * interrupt is enabled.
+ * Fill the initial masked state based on virq_disabled. After
+ * enable, changing the DisINTx bit in vconfig directly changes INTx
+ * masking. igate prevents races during setup, once running masked
+ * is protected via irqlock.
+ *
+ * Devices supporting DisINTx also reflect the current mask state in
+ * the physical DisINTx bit, which is not affected during IRQ setup.
+ *
+ * Devices without DisINTx support require an exclusive interrupt.
+ * IRQ masking is performed at the IRQ chip. Again, igate protects
+ * against races during setup and IRQ handlers and irqfds are not
+ * yet active, therefore masked is stable and can be used to
+ * conditionally auto-enable the IRQ.
+ *
+ * irq_type must be stable while the IRQ handler is registered,
+ * therefore it must be set before request_irq().
*/
vdev->ctx[0].masked = vdev->virq_disabled;
- if (vdev->pci_2_3)
- pci_intx(vdev->pdev, !vdev->ctx[0].masked);
+ if (vdev->pci_2_3) {
+ pci_intx(pdev, !vdev->ctx[0].masked);
+ irqflags = IRQF_SHARED;
+ } else {
+ irqflags = vdev->ctx[0].masked ? IRQF_NO_AUTOEN : 0;
+ }
vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
+ ret = request_irq(pdev->irq, vfio_intx_handler,
+ irqflags, vdev->ctx[0].name, vdev);
+ if (ret) {
+ vdev->irq_type = VFIO_PCI_NUM_IRQS;
+ kfree(name);
+ vdev->num_ctx = 0;
+ kfree(vdev->ctx);
+ return ret;
+ }
+
return 0;
}
-static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
+static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev,
+ struct eventfd_ctx *trigger)
{
struct pci_dev *pdev = vdev->pdev;
- unsigned long irqflags = IRQF_SHARED;
- struct eventfd_ctx *trigger;
- unsigned long flags;
- int ret;
+ struct eventfd_ctx *old;
- if (vdev->ctx[0].trigger) {
- free_irq(pdev->irq, vdev);
- kfree(vdev->ctx[0].name);
- eventfd_ctx_put(vdev->ctx[0].trigger);
- vdev->ctx[0].trigger = NULL;
- }
-
- if (fd < 0) /* Disable only */
- return 0;
-
- vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)",
- pci_name(pdev));
- if (!vdev->ctx[0].name)
- return -ENOMEM;
+ old = vdev->ctx[0].trigger;
- trigger = eventfd_ctx_fdget(fd);
- if (IS_ERR(trigger)) {
- kfree(vdev->ctx[0].name);
- return PTR_ERR(trigger);
- }
+ WRITE_ONCE(vdev->ctx[0].trigger, trigger);
- vdev->ctx[0].trigger = trigger;
-
- if (!vdev->pci_2_3)
- irqflags = 0;
-
- ret = request_irq(pdev->irq, vfio_intx_handler,
- irqflags, vdev->ctx[0].name, vdev);
- if (ret) {
- vdev->ctx[0].trigger = NULL;
- kfree(vdev->ctx[0].name);
- eventfd_ctx_put(trigger);
- return ret;
+ /* Releasing an old ctx requires synchronizing in-flight users */
+ if (old) {
+ synchronize_irq(pdev->irq);
+ vfio_virqfd_flush_thread(&vdev->ctx[0].unmask);
+ eventfd_ctx_put(old);
}
- /*
- * INTx disable will stick across the new irq setup,
- * disable_irq won't.
- */
- spin_lock_irqsave(&vdev->irqlock, flags);
- if (!vdev->pci_2_3 && vdev->ctx[0].masked)
- disable_irq_nosync(pdev->irq);
- spin_unlock_irqrestore(&vdev->irqlock, flags);
-
return 0;
}
static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
{
+ struct pci_dev *pdev = vdev->pdev;
+
vfio_virqfd_disable(&vdev->ctx[0].unmask);
vfio_virqfd_disable(&vdev->ctx[0].mask);
- vfio_intx_set_signal(vdev, -1);
+ free_irq(pdev->irq, vdev);
+ if (vdev->ctx[0].trigger)
+ eventfd_ctx_put(vdev->ctx[0].trigger);
+ kfree(vdev->ctx[0].name);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
vdev->num_ctx = 0;
kfree(vdev->ctx);
@@ -284,7 +320,8 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
if (!is_irq_none(vdev))
return -EINVAL;
- vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
+ vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx),
+ GFP_KERNEL_ACCOUNT);
if (!vdev->ctx)
return -ENOMEM;
@@ -316,14 +353,14 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
}
static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
- int vector, int fd, bool msix)
+ unsigned int vector, int fd, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
struct eventfd_ctx *trigger;
int irq, ret;
u16 cmd;
- if (vector < 0 || vector >= vdev->num_ctx)
+ if (vector >= vdev->num_ctx)
return -EINVAL;
irq = pci_irq_vector(pdev, vector);
@@ -343,7 +380,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
if (fd < 0)
return 0;
- vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
+ vdev->ctx[vector].name = kasprintf(GFP_KERNEL_ACCOUNT,
+ "vfio-msi%s[%d](%s)",
msix ? "x" : "", vector,
pci_name(pdev));
if (!vdev->ctx[vector].name)
@@ -397,7 +435,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
unsigned count, int32_t *fds, bool msix)
{
- int i, j, ret = 0;
+ unsigned int i, j;
+ int ret = 0;
if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
return -EINVAL;
@@ -408,8 +447,8 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
}
if (ret) {
- for (--j; j >= (int)start; j--)
- vfio_msi_set_vector_signal(vdev, j, -1, msix);
+ for (i = start; i < j; i++)
+ vfio_msi_set_vector_signal(vdev, i, -1, msix);
}
return ret;
@@ -418,16 +457,15 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
- int i;
+ unsigned int i;
u16 cmd;
for (i = 0; i < vdev->num_ctx; i++) {
vfio_virqfd_disable(&vdev->ctx[i].unmask);
vfio_virqfd_disable(&vdev->ctx[i].mask);
+ vfio_msi_set_vector_signal(vdev, i, -1, msix);
}
- vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
-
cmd = vfio_pci_memory_lock_and_enable(vdev);
pci_free_irq_vectors(pdev);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
@@ -455,11 +493,11 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
return -EINVAL;
if (flags & VFIO_IRQ_SET_DATA_NONE) {
- vfio_pci_intx_unmask(vdev);
+ __vfio_pci_intx_unmask(vdev);
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
uint8_t unmask = *(uint8_t *)data;
if (unmask)
- vfio_pci_intx_unmask(vdev);
+ __vfio_pci_intx_unmask(vdev);
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
int32_t fd = *(int32_t *)data;
if (fd >= 0)
@@ -482,11 +520,11 @@ static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev,
return -EINVAL;
if (flags & VFIO_IRQ_SET_DATA_NONE) {
- vfio_pci_intx_mask(vdev);
+ __vfio_pci_intx_mask(vdev);
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
uint8_t mask = *(uint8_t *)data;
if (mask)
- vfio_pci_intx_mask(vdev);
+ __vfio_pci_intx_mask(vdev);
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
return -ENOTTY; /* XXX implement me */
}
@@ -507,19 +545,23 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
return -EINVAL;
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ struct eventfd_ctx *trigger = NULL;
int32_t fd = *(int32_t *)data;
int ret;
- if (is_intx(vdev))
- return vfio_intx_set_signal(vdev, fd);
+ if (fd >= 0) {
+ trigger = eventfd_ctx_fdget(fd);
+ if (IS_ERR(trigger))
+ return PTR_ERR(trigger);
+ }
- ret = vfio_intx_enable(vdev);
- if (ret)
- return ret;
+ if (is_intx(vdev))
+ ret = vfio_intx_set_signal(vdev, trigger);
+ else
+ ret = vfio_intx_enable(vdev, trigger);
- ret = vfio_intx_set_signal(vdev, fd);
- if (ret)
- vfio_intx_disable(vdev);
+ if (ret && trigger)
+ eventfd_ctx_put(trigger);
return ret;
}
@@ -541,7 +583,7 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
unsigned index, unsigned start,
unsigned count, uint32_t flags, void *data)
{
- int i;
+ unsigned int i;
bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index e352a033b..e27de61ac 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -470,7 +470,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
goto out_unlock;
}
- ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL);
+ ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
if (!ioeventfd) {
ret = -ENOMEM;
goto out_unlock;