summaryrefslogtreecommitdiffstats
path: root/drivers/xen/xen-pciback
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen/xen-pciback')
-rw-r--r--drivers/xen/xen-pciback/Makefile8
-rw-r--r--drivers/xen/xen-pciback/conf_space.c431
-rw-r--r--drivers/xen/xen-pciback/conf_space.h129
-rw-r--r--drivers/xen/xen-pciback/conf_space_capability.c207
-rw-r--r--drivers/xen/xen-pciback/conf_space_header.c424
-rw-r--r--drivers/xen/xen-pciback/conf_space_quirks.c140
-rw-r--r--drivers/xen/xen-pciback/conf_space_quirks.h34
-rw-r--r--drivers/xen/xen-pciback/passthrough.c197
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c1652
-rw-r--r--drivers/xen/xen-pciback/pciback.h202
-rw-r--r--drivers/xen/xen-pciback/pciback_ops.c464
-rw-r--r--drivers/xen/xen-pciback/vpci.c273
-rw-r--r--drivers/xen/xen-pciback/xenbus.c755
13 files changed, 4916 insertions, 0 deletions
diff --git a/drivers/xen/xen-pciback/Makefile b/drivers/xen/xen-pciback/Makefile
new file mode 100644
index 000000000..e8d981d43
--- /dev/null
+++ b/drivers/xen/xen-pciback/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
+
+xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
+xen-pciback-y += conf_space.o conf_space_header.o \
+ conf_space_capability.o \
+ conf_space_quirks.o vpci.o \
+ passthrough.o
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
new file mode 100644
index 000000000..60111719b
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Backend - Functions for creating a virtual configuration space for
+ * exported PCI Devices.
+ * It's dangerous to allow PCI Driver Domains to change their
+ * device's resources (memory, i/o ports, interrupts). We need to
+ * restrict changes to certain PCI Configuration registers:
+ * BARs, INTERRUPT_PIN, most registers in the header...
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include "pciback.h"
+#include "conf_space.h"
+#include "conf_space_quirks.h"
+
+bool xen_pcibk_permissive;
+module_param_named(permissive, xen_pcibk_permissive, bool, 0644);
+
+/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
+ * xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */
+#define DEFINE_PCI_CONFIG(op, size, type) \
+int xen_pcibk_##op##_config_##size \
+(struct pci_dev *dev, int offset, type value, void *data) \
+{ \
+ return pci_##op##_config_##size(dev, offset, value); \
+}
+
+DEFINE_PCI_CONFIG(read, byte, u8 *)
+DEFINE_PCI_CONFIG(read, word, u16 *)
+DEFINE_PCI_CONFIG(read, dword, u32 *)
+
+DEFINE_PCI_CONFIG(write, byte, u8)
+DEFINE_PCI_CONFIG(write, word, u16)
+DEFINE_PCI_CONFIG(write, dword, u32)
+
+static int conf_space_read(struct pci_dev *dev,
+ const struct config_field_entry *entry,
+ int offset, u32 *value)
+{
+ int ret = 0;
+ const struct config_field *field = entry->field;
+
+ *value = 0;
+
+ switch (field->size) {
+ case 1:
+ if (field->u.b.read)
+ ret = field->u.b.read(dev, offset, (u8 *) value,
+ entry->data);
+ break;
+ case 2:
+ if (field->u.w.read)
+ ret = field->u.w.read(dev, offset, (u16 *) value,
+ entry->data);
+ break;
+ case 4:
+ if (field->u.dw.read)
+ ret = field->u.dw.read(dev, offset, value, entry->data);
+ break;
+ }
+ return ret;
+}
+
+static int conf_space_write(struct pci_dev *dev,
+ const struct config_field_entry *entry,
+ int offset, u32 value)
+{
+ int ret = 0;
+ const struct config_field *field = entry->field;
+
+ switch (field->size) {
+ case 1:
+ if (field->u.b.write)
+ ret = field->u.b.write(dev, offset, (u8) value,
+ entry->data);
+ break;
+ case 2:
+ if (field->u.w.write)
+ ret = field->u.w.write(dev, offset, (u16) value,
+ entry->data);
+ break;
+ case 4:
+ if (field->u.dw.write)
+ ret = field->u.dw.write(dev, offset, value,
+ entry->data);
+ break;
+ }
+ return ret;
+}
+
+static inline u32 get_mask(int size)
+{
+ if (size == 1)
+ return 0xff;
+ else if (size == 2)
+ return 0xffff;
+ else
+ return 0xffffffff;
+}
+
+static inline int valid_request(int offset, int size)
+{
+ /* Validate request (no un-aligned requests) */
+ if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
+ return 1;
+ return 0;
+}
+
+static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
+ int offset)
+{
+ if (offset >= 0) {
+ new_val_mask <<= (offset * 8);
+ new_val <<= (offset * 8);
+ } else {
+ new_val_mask >>= (offset * -8);
+ new_val >>= (offset * -8);
+ }
+ val = (val & ~new_val_mask) | (new_val & new_val_mask);
+
+ return val;
+}
+
+static int xen_pcibios_err_to_errno(int err)
+{
+ switch (err) {
+ case PCIBIOS_SUCCESSFUL:
+ return XEN_PCI_ERR_success;
+ case PCIBIOS_DEVICE_NOT_FOUND:
+ return XEN_PCI_ERR_dev_not_found;
+ case PCIBIOS_BAD_REGISTER_NUMBER:
+ return XEN_PCI_ERR_invalid_offset;
+ case PCIBIOS_FUNC_NOT_SUPPORTED:
+ return XEN_PCI_ERR_not_implemented;
+ case PCIBIOS_SET_FAILED:
+ return XEN_PCI_ERR_access_denied;
+ }
+ return err;
+}
+
+int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
+ u32 *ret_val)
+{
+ int err = 0;
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ const struct config_field_entry *cfg_entry;
+ const struct config_field *field;
+ int field_start, field_end;
+ /* if read fails for any reason, return 0
+ * (as if device didn't respond) */
+ u32 value = 0, tmp_val;
+
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n",
+ pci_name(dev), size, offset);
+
+ if (!valid_request(offset, size)) {
+ err = XEN_PCI_ERR_invalid_offset;
+ goto out;
+ }
+
+ /* Get the real value first, then modify as appropriate */
+ switch (size) {
+ case 1:
+ err = pci_read_config_byte(dev, offset, (u8 *) &value);
+ break;
+ case 2:
+ err = pci_read_config_word(dev, offset, (u16 *) &value);
+ break;
+ case 4:
+ err = pci_read_config_dword(dev, offset, &value);
+ break;
+ }
+
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+
+ field_start = OFFSET(cfg_entry);
+ field_end = OFFSET(cfg_entry) + field->size;
+
+ if (offset + size > field_start && field_end > offset) {
+ err = conf_space_read(dev, cfg_entry, field_start,
+ &tmp_val);
+ if (err)
+ goto out;
+
+ value = merge_value(value, tmp_val,
+ get_mask(field->size),
+ field_start - offset);
+ }
+ }
+
+out:
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n",
+ pci_name(dev), size, offset, value);
+
+ *ret_val = value;
+ return xen_pcibios_err_to_errno(err);
+}
+
+int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
+{
+ int err = 0, handled = 0;
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ const struct config_field_entry *cfg_entry;
+ const struct config_field *field;
+ u32 tmp_val;
+ int field_start, field_end;
+
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG
+ DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n",
+ pci_name(dev), size, offset, value);
+
+ if (!valid_request(offset, size))
+ return XEN_PCI_ERR_invalid_offset;
+
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+
+ field_start = OFFSET(cfg_entry);
+ field_end = OFFSET(cfg_entry) + field->size;
+
+ if (offset + size > field_start && field_end > offset) {
+ err = conf_space_read(dev, cfg_entry, field_start,
+ &tmp_val);
+ if (err)
+ break;
+
+ tmp_val = merge_value(tmp_val, value, get_mask(size),
+ offset - field_start);
+
+ err = conf_space_write(dev, cfg_entry, field_start,
+ tmp_val);
+
+ /* handled is set true here, but not every byte
+ * may have been written! Properly detecting if
+ * every byte is handled is unnecessary as the
+ * flag is used to detect devices that need
+ * special helpers to work correctly.
+ */
+ handled = 1;
+ }
+ }
+
+ if (!handled && !err) {
+ /* By default, anything not specificially handled above is
+ * read-only. The permissive flag changes this behavior so
+ * that anything not specifically handled above is writable.
+ * This means that some fields may still be read-only because
+ * they have entries in the config_field list that intercept
+ * the write and do nothing. */
+ if (dev_data->permissive || xen_pcibk_permissive) {
+ switch (size) {
+ case 1:
+ err = pci_write_config_byte(dev, offset,
+ (u8) value);
+ break;
+ case 2:
+ err = pci_write_config_word(dev, offset,
+ (u16) value);
+ break;
+ case 4:
+ err = pci_write_config_dword(dev, offset,
+ (u32) value);
+ break;
+ }
+ } else if (!dev_data->warned_on_write) {
+ dev_data->warned_on_write = 1;
+ dev_warn(&dev->dev, "Driver tried to write to a "
+ "read-only configuration space field at offset"
+ " 0x%x, size %d. This may be harmless, but if "
+ "you have problems with your device:\n"
+ "1) see permissive attribute in sysfs\n"
+ "2) report problems to the xen-devel "
+ "mailing list along with details of your "
+ "device obtained from lspci.\n", offset, size);
+ }
+ }
+
+ return xen_pcibios_err_to_errno(err);
+}
+
+void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev)
+{
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ struct config_field_entry *cfg_entry, *t;
+ const struct config_field *field;
+
+ dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
+ "configuration space fields\n");
+ if (!dev_data)
+ return;
+
+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+
+ if (field->clean) {
+ field->clean((struct config_field *)field);
+
+ kfree(cfg_entry->data);
+
+ list_del(&cfg_entry->list);
+ kfree(cfg_entry);
+ }
+
+ }
+}
+
+void xen_pcibk_config_reset_dev(struct pci_dev *dev)
+{
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ const struct config_field_entry *cfg_entry;
+ const struct config_field *field;
+
+ dev_dbg(&dev->dev, "resetting virtual configuration space\n");
+ if (!dev_data)
+ return;
+
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+
+ if (field->reset)
+ field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
+ }
+}
+
+void xen_pcibk_config_free_dev(struct pci_dev *dev)
+{
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ struct config_field_entry *cfg_entry, *t;
+ const struct config_field *field;
+
+ dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
+ if (!dev_data)
+ return;
+
+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
+ list_del(&cfg_entry->list);
+
+ field = cfg_entry->field;
+
+ if (field->release)
+ field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
+
+ kfree(cfg_entry);
+ }
+}
+
+int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
+ const struct config_field *field,
+ unsigned int base_offset)
+{
+ int err = 0;
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ struct config_field_entry *cfg_entry;
+ void *tmp;
+
+ cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
+ if (!cfg_entry) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ cfg_entry->data = NULL;
+ cfg_entry->field = field;
+ cfg_entry->base_offset = base_offset;
+
+ /* silently ignore duplicate fields */
+ err = xen_pcibk_field_is_dup(dev, OFFSET(cfg_entry));
+ if (err)
+ goto out;
+
+ if (field->init) {
+ tmp = field->init(dev, OFFSET(cfg_entry));
+
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ goto out;
+ }
+
+ cfg_entry->data = tmp;
+ }
+
+ dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
+ OFFSET(cfg_entry));
+ list_add_tail(&cfg_entry->list, &dev_data->config_fields);
+
+out:
+ if (err)
+ kfree(cfg_entry);
+
+ return err;
+}
+
+/* This sets up the device's virtual configuration space to keep track of
+ * certain registers (like the base address registers (BARs) so that we can
+ * keep the client from manipulating them directly.
+ */
+int xen_pcibk_config_init_dev(struct pci_dev *dev)
+{
+ int err = 0;
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+
+ dev_dbg(&dev->dev, "initializing virtual configuration space\n");
+
+ INIT_LIST_HEAD(&dev_data->config_fields);
+
+ err = xen_pcibk_config_header_add_fields(dev);
+ if (err)
+ goto out;
+
+ err = xen_pcibk_config_capability_add_fields(dev);
+ if (err)
+ goto out;
+
+ err = xen_pcibk_config_quirks_init(dev);
+
+out:
+ return err;
+}
+
+int xen_pcibk_config_init(void)
+{
+ return xen_pcibk_config_capability_init();
+}
diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h
new file mode 100644
index 000000000..22db63071
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PCI Backend - Common data structures for overriding the configuration space
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#ifndef __XEN_PCIBACK_CONF_SPACE_H__
+#define __XEN_PCIBACK_CONF_SPACE_H__
+
+#include <linux/list.h>
+#include <linux/err.h>
+
+/* conf_field_init can return an errno in a ptr with ERR_PTR() */
+typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
+typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
+typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
+
+typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
+ void *data);
+typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
+ void *data);
+typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
+ void *data);
+typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
+ void *data);
+typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
+ void *data);
+typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
+ void *data);
+
+/* These are the fields within the configuration space which we
+ * are interested in intercepting reads/writes to and changing their
+ * values.
+ */
+struct config_field {
+ unsigned int offset;
+ unsigned int size;
+ unsigned int mask;
+ conf_field_init init;
+ conf_field_reset reset;
+ conf_field_free release;
+ void (*clean) (struct config_field *field);
+ union {
+ struct {
+ conf_dword_write write;
+ conf_dword_read read;
+ } dw;
+ struct {
+ conf_word_write write;
+ conf_word_read read;
+ } w;
+ struct {
+ conf_byte_write write;
+ conf_byte_read read;
+ } b;
+ } u;
+ struct list_head list;
+};
+
+struct config_field_entry {
+ struct list_head list;
+ const struct config_field *field;
+ unsigned int base_offset;
+ void *data;
+};
+
+extern bool xen_pcibk_permissive;
+
+#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
+
+/* Add fields to a device - the add_fields macro expects to get a pointer to
+ * the first entry in an array (of which the ending is marked by size==0)
+ */
+int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
+ const struct config_field *field,
+ unsigned int offset);
+
+static inline int xen_pcibk_config_add_field(struct pci_dev *dev,
+ const struct config_field *field)
+{
+ return xen_pcibk_config_add_field_offset(dev, field, 0);
+}
+
+static inline int xen_pcibk_config_add_fields(struct pci_dev *dev,
+ const struct config_field *field)
+{
+ int i, err = 0;
+ for (i = 0; field[i].size != 0; i++) {
+ err = xen_pcibk_config_add_field(dev, &field[i]);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+static inline int xen_pcibk_config_add_fields_offset(struct pci_dev *dev,
+ const struct config_field *field,
+ unsigned int offset)
+{
+ int i, err = 0;
+ for (i = 0; field[i].size != 0; i++) {
+ err = xen_pcibk_config_add_field_offset(dev, &field[i], offset);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+/* Read/Write the real configuration space */
+int xen_pcibk_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
+ void *data);
+int xen_pcibk_read_config_word(struct pci_dev *dev, int offset, u16 *value,
+ void *data);
+int xen_pcibk_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
+ void *data);
+int xen_pcibk_write_config_byte(struct pci_dev *dev, int offset, u8 value,
+ void *data);
+int xen_pcibk_write_config_word(struct pci_dev *dev, int offset, u16 value,
+ void *data);
+int xen_pcibk_write_config_dword(struct pci_dev *dev, int offset, u32 value,
+ void *data);
+
+int xen_pcibk_config_capability_init(void);
+
+int xen_pcibk_config_header_add_fields(struct pci_dev *dev);
+int xen_pcibk_config_capability_add_fields(struct pci_dev *dev);
+
+#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c
new file mode 100644
index 000000000..42f0f64fc
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_capability.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Backend - Handles the virtual fields found on the capability lists
+ * in the configuration space.
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include "pciback.h"
+#include "conf_space.h"
+
+static LIST_HEAD(capabilities);
+struct xen_pcibk_config_capability {
+ struct list_head cap_list;
+
+ int capability;
+
+ /* If the device has the capability found above, add these fields */
+ const struct config_field *fields;
+};
+
+static const struct config_field caplist_header[] = {
+ {
+ .offset = PCI_CAP_LIST_ID,
+ .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
+ .u.w.read = xen_pcibk_read_config_word,
+ .u.w.write = NULL,
+ },
+ {}
+};
+
+static inline void register_capability(struct xen_pcibk_config_capability *cap)
+{
+ list_add_tail(&cap->cap_list, &capabilities);
+}
+
+int xen_pcibk_config_capability_add_fields(struct pci_dev *dev)
+{
+ int err = 0;
+ struct xen_pcibk_config_capability *cap;
+ int cap_offset;
+
+ list_for_each_entry(cap, &capabilities, cap_list) {
+ cap_offset = pci_find_capability(dev, cap->capability);
+ if (cap_offset) {
+ dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
+ cap->capability, cap_offset);
+
+ err = xen_pcibk_config_add_fields_offset(dev,
+ caplist_header,
+ cap_offset);
+ if (err)
+ goto out;
+ err = xen_pcibk_config_add_fields_offset(dev,
+ cap->fields,
+ cap_offset);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ return err;
+}
+
+static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
+ void *data)
+{
+ /* Disallow writes to the vital product data */
+ if (value & PCI_VPD_ADDR_F)
+ return PCIBIOS_SET_FAILED;
+ else
+ return pci_write_config_word(dev, offset, value);
+}
+
+static const struct config_field caplist_vpd[] = {
+ {
+ .offset = PCI_VPD_ADDR,
+ .size = 2,
+ .u.w.read = xen_pcibk_read_config_word,
+ .u.w.write = vpd_address_write,
+ },
+ {
+ .offset = PCI_VPD_DATA,
+ .size = 4,
+ .u.dw.read = xen_pcibk_read_config_dword,
+ .u.dw.write = NULL,
+ },
+ {}
+};
+
+static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
+ void *data)
+{
+ int err;
+ u16 real_value;
+
+ err = pci_read_config_word(dev, offset, &real_value);
+ if (err)
+ goto out;
+
+ *value = real_value & ~PCI_PM_CAP_PME_MASK;
+
+out:
+ return err;
+}
+
+/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
+ * Can't allow driver domain to enable PMEs - they're shared */
+#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
+
+static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
+ void *data)
+{
+ int err;
+ u16 old_value;
+ pci_power_t new_state;
+
+ err = pci_read_config_word(dev, offset, &old_value);
+ if (err)
+ goto out;
+
+ new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
+
+ new_value &= PM_OK_BITS;
+ if ((old_value & PM_OK_BITS) != new_value) {
+ new_value = (old_value & ~PM_OK_BITS) | new_value;
+ err = pci_write_config_word(dev, offset, new_value);
+ if (err)
+ goto out;
+ }
+
+ /* Let pci core handle the power management change */
+ dev_dbg(&dev->dev, "set power state to %x\n", new_state);
+ err = pci_set_power_state(dev, new_state);
+ if (err) {
+ err = PCIBIOS_SET_FAILED;
+ goto out;
+ }
+
+ out:
+ return err;
+}
+
+/* Ensure PMEs are disabled */
+static void *pm_ctrl_init(struct pci_dev *dev, int offset)
+{
+ int err;
+ u16 value;
+
+ err = pci_read_config_word(dev, offset, &value);
+ if (err)
+ goto out;
+
+ if (value & PCI_PM_CTRL_PME_ENABLE) {
+ value &= ~PCI_PM_CTRL_PME_ENABLE;
+ err = pci_write_config_word(dev, offset, value);
+ }
+
+out:
+ return err ? ERR_PTR(err) : NULL;
+}
+
+static const struct config_field caplist_pm[] = {
+ {
+ .offset = PCI_PM_PMC,
+ .size = 2,
+ .u.w.read = pm_caps_read,
+ },
+ {
+ .offset = PCI_PM_CTRL,
+ .size = 2,
+ .init = pm_ctrl_init,
+ .u.w.read = xen_pcibk_read_config_word,
+ .u.w.write = pm_ctrl_write,
+ },
+ {
+ .offset = PCI_PM_PPB_EXTENSIONS,
+ .size = 1,
+ .u.b.read = xen_pcibk_read_config_byte,
+ },
+ {
+ .offset = PCI_PM_DATA_REGISTER,
+ .size = 1,
+ .u.b.read = xen_pcibk_read_config_byte,
+ },
+ {}
+};
+
+static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = {
+ .capability = PCI_CAP_ID_PM,
+ .fields = caplist_pm,
+};
+static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = {
+ .capability = PCI_CAP_ID_VPD,
+ .fields = caplist_vpd,
+};
+
+int xen_pcibk_config_capability_init(void)
+{
+ register_capability(&xen_pcibk_config_capability_vpd);
+ register_capability(&xen_pcibk_config_capability_pm);
+
+ return 0;
+}
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
new file mode 100644
index 000000000..10ae24b5a
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Backend - Handles the virtual fields in the configuration space headers.
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include "pciback.h"
+#include "conf_space.h"
+
+struct pci_cmd_info {
+ u16 val;
+};
+
+struct pci_bar_info {
+ u32 val;
+ u32 len_val;
+ int which;
+};
+
+#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
+#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
+
+/* Bits guests are allowed to control in permissive mode. */
+#define PCI_COMMAND_GUEST (PCI_COMMAND_MASTER|PCI_COMMAND_SPECIAL| \
+ PCI_COMMAND_INVALIDATE|PCI_COMMAND_VGA_PALETTE| \
+ PCI_COMMAND_WAIT|PCI_COMMAND_FAST_BACK)
+
+static void *command_init(struct pci_dev *dev, int offset)
+{
+ struct pci_cmd_info *cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+ int err;
+
+ if (!cmd)
+ return ERR_PTR(-ENOMEM);
+
+ err = pci_read_config_word(dev, PCI_COMMAND, &cmd->val);
+ if (err) {
+ kfree(cmd);
+ return ERR_PTR(err);
+ }
+
+ return cmd;
+}
+
+static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
+{
+ int ret = pci_read_config_word(dev, offset, value);
+ const struct pci_cmd_info *cmd = data;
+
+ *value &= PCI_COMMAND_GUEST;
+ *value |= cmd->val & ~PCI_COMMAND_GUEST;
+
+ return ret;
+}
+
+static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
+{
+ struct xen_pcibk_dev_data *dev_data;
+ int err;
+ u16 val;
+ struct pci_cmd_info *cmd = data;
+
+ dev_data = pci_get_drvdata(dev);
+ if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: enable\n",
+ pci_name(dev));
+ err = pci_enable_device(dev);
+ if (err)
+ return err;
+ if (dev_data)
+ dev_data->enable_intx = 1;
+ } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: disable\n",
+ pci_name(dev));
+ pci_disable_device(dev);
+ if (dev_data)
+ dev_data->enable_intx = 0;
+ }
+
+ if (!dev->is_busmaster && is_master_cmd(value)) {
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n",
+ pci_name(dev));
+ pci_set_master(dev);
+ } else if (dev->is_busmaster && !is_master_cmd(value)) {
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: clear bus master\n",
+ pci_name(dev));
+ pci_clear_master(dev);
+ }
+
+ if (!(cmd->val & PCI_COMMAND_INVALIDATE) &&
+ (value & PCI_COMMAND_INVALIDATE)) {
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG
+ DRV_NAME ": %s: enable memory-write-invalidate\n",
+ pci_name(dev));
+ err = pci_set_mwi(dev);
+ if (err) {
+ pr_warn("%s: cannot enable memory-write-invalidate (%d)\n",
+ pci_name(dev), err);
+ value &= ~PCI_COMMAND_INVALIDATE;
+ }
+ } else if ((cmd->val & PCI_COMMAND_INVALIDATE) &&
+ !(value & PCI_COMMAND_INVALIDATE)) {
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG
+ DRV_NAME ": %s: disable memory-write-invalidate\n",
+ pci_name(dev));
+ pci_clear_mwi(dev);
+ }
+
+ cmd->val = value;
+
+ if (!xen_pcibk_permissive && (!dev_data || !dev_data->permissive))
+ return 0;
+
+ /* Only allow the guest to control certain bits. */
+ err = pci_read_config_word(dev, offset, &val);
+ if (err || val == value)
+ return err;
+
+ value &= PCI_COMMAND_GUEST;
+ value |= val & ~PCI_COMMAND_GUEST;
+
+ return pci_write_config_word(dev, offset, value);
+}
+
+static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
+{
+ struct pci_bar_info *bar = data;
+
+ if (unlikely(!bar)) {
+ pr_warn(DRV_NAME ": driver data not found for %s\n",
+ pci_name(dev));
+ return XEN_PCI_ERR_op_failed;
+ }
+
+ /* A write to obtain the length must happen as a 32-bit write.
+ * This does not (yet) support writing individual bytes
+ */
+ if ((value | ~PCI_ROM_ADDRESS_MASK) == ~0U)
+ bar->which = 1;
+ else {
+ u32 tmpval;
+ pci_read_config_dword(dev, offset, &tmpval);
+ if (tmpval != bar->val && value == bar->val) {
+ /* Allow restoration of bar value. */
+ pci_write_config_dword(dev, offset, bar->val);
+ }
+ bar->which = 0;
+ }
+
+ /* Do we need to support enabling/disabling the rom address here? */
+
+ return 0;
+}
+
+/* For the BARs, only allow writes which write ~0 or
+ * the correct resource information
+ * (Needed for when the driver probes the resource usage)
+ */
+static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
+{
+ struct pci_bar_info *bar = data;
+ unsigned int pos = (offset - PCI_BASE_ADDRESS_0) / 4;
+ const struct resource *res = dev->resource;
+ u32 mask;
+
+ if (unlikely(!bar)) {
+ pr_warn(DRV_NAME ": driver data not found for %s\n",
+ pci_name(dev));
+ return XEN_PCI_ERR_op_failed;
+ }
+
+ /* A write to obtain the length must happen as a 32-bit write.
+ * This does not (yet) support writing individual bytes
+ */
+ if (res[pos].flags & IORESOURCE_IO)
+ mask = ~PCI_BASE_ADDRESS_IO_MASK;
+ else if (pos && (res[pos - 1].flags & IORESOURCE_MEM_64))
+ mask = 0;
+ else
+ mask = ~PCI_BASE_ADDRESS_MEM_MASK;
+ if ((value | mask) == ~0U)
+ bar->which = 1;
+ else {
+ u32 tmpval;
+ pci_read_config_dword(dev, offset, &tmpval);
+ if (tmpval != bar->val && value == bar->val) {
+ /* Allow restoration of bar value. */
+ pci_write_config_dword(dev, offset, bar->val);
+ }
+ bar->which = 0;
+ }
+
+ return 0;
+}
+
+static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
+{
+ struct pci_bar_info *bar = data;
+
+ if (unlikely(!bar)) {
+ pr_warn(DRV_NAME ": driver data not found for %s\n",
+ pci_name(dev));
+ return XEN_PCI_ERR_op_failed;
+ }
+
+ *value = bar->which ? bar->len_val : bar->val;
+
+ return 0;
+}
+
+static void *bar_init(struct pci_dev *dev, int offset)
+{
+ unsigned int pos;
+ const struct resource *res = dev->resource;
+ struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL);
+
+ if (!bar)
+ return ERR_PTR(-ENOMEM);
+
+ if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
+ pos = PCI_ROM_RESOURCE;
+ else {
+ pos = (offset - PCI_BASE_ADDRESS_0) / 4;
+ if (pos && (res[pos - 1].flags & IORESOURCE_MEM_64)) {
+ bar->val = res[pos - 1].start >> 32;
+ bar->len_val = -resource_size(&res[pos - 1]) >> 32;
+ return bar;
+ }
+ }
+
+ if (!res[pos].flags ||
+ (res[pos].flags & (IORESOURCE_DISABLED | IORESOURCE_UNSET |
+ IORESOURCE_BUSY)))
+ return bar;
+
+ bar->val = res[pos].start |
+ (res[pos].flags & PCI_REGION_FLAG_MASK);
+ bar->len_val = -resource_size(&res[pos]) |
+ (res[pos].flags & PCI_REGION_FLAG_MASK);
+
+ return bar;
+}
+
+static void bar_reset(struct pci_dev *dev, int offset, void *data)
+{
+ struct pci_bar_info *bar = data;
+
+ bar->which = 0;
+}
+
+static void bar_release(struct pci_dev *dev, int offset, void *data)
+{
+ kfree(data);
+}
+
+static int xen_pcibk_read_vendor(struct pci_dev *dev, int offset,
+ u16 *value, void *data)
+{
+ *value = dev->vendor;
+
+ return 0;
+}
+
+static int xen_pcibk_read_device(struct pci_dev *dev, int offset,
+ u16 *value, void *data)
+{
+ *value = dev->device;
+
+ return 0;
+}
+
+static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
+ void *data)
+{
+ *value = (u8) dev->irq;
+
+ return 0;
+}
+
+static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
+{
+ u8 cur_value;
+ int err;
+
+ err = pci_read_config_byte(dev, offset, &cur_value);
+ if (err)
+ goto out;
+
+ if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
+ || value == PCI_BIST_START)
+ err = pci_write_config_byte(dev, offset, value);
+
+out:
+ return err;
+}
+
+static const struct config_field header_common[] = {
+ {
+ .offset = PCI_VENDOR_ID,
+ .size = 2,
+ .u.w.read = xen_pcibk_read_vendor,
+ },
+ {
+ .offset = PCI_DEVICE_ID,
+ .size = 2,
+ .u.w.read = xen_pcibk_read_device,
+ },
+ {
+ .offset = PCI_COMMAND,
+ .size = 2,
+ .init = command_init,
+ .release = bar_release,
+ .u.w.read = command_read,
+ .u.w.write = command_write,
+ },
+ {
+ .offset = PCI_INTERRUPT_LINE,
+ .size = 1,
+ .u.b.read = interrupt_read,
+ },
+ {
+ .offset = PCI_INTERRUPT_PIN,
+ .size = 1,
+ .u.b.read = xen_pcibk_read_config_byte,
+ },
+ {
+ /* Any side effects of letting driver domain control cache line? */
+ .offset = PCI_CACHE_LINE_SIZE,
+ .size = 1,
+ .u.b.read = xen_pcibk_read_config_byte,
+ .u.b.write = xen_pcibk_write_config_byte,
+ },
+ {
+ .offset = PCI_LATENCY_TIMER,
+ .size = 1,
+ .u.b.read = xen_pcibk_read_config_byte,
+ },
+ {
+ .offset = PCI_BIST,
+ .size = 1,
+ .u.b.read = xen_pcibk_read_config_byte,
+ .u.b.write = bist_write,
+ },
+ {}
+};
+
+#define CFG_FIELD_BAR(reg_offset) \
+ { \
+ .offset = reg_offset, \
+ .size = 4, \
+ .init = bar_init, \
+ .reset = bar_reset, \
+ .release = bar_release, \
+ .u.dw.read = bar_read, \
+ .u.dw.write = bar_write, \
+ }
+
+#define CFG_FIELD_ROM(reg_offset) \
+ { \
+ .offset = reg_offset, \
+ .size = 4, \
+ .init = bar_init, \
+ .reset = bar_reset, \
+ .release = bar_release, \
+ .u.dw.read = bar_read, \
+ .u.dw.write = rom_write, \
+ }
+
+static const struct config_field header_0[] = {
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
+ CFG_FIELD_ROM(PCI_ROM_ADDRESS),
+ {}
+};
+
+static const struct config_field header_1[] = {
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
+ CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
+ {}
+};
+
+int xen_pcibk_config_header_add_fields(struct pci_dev *dev)
+{
+ int err;
+
+ err = xen_pcibk_config_add_fields(dev, header_common);
+ if (err)
+ goto out;
+
+ switch (dev->hdr_type) {
+ case PCI_HEADER_TYPE_NORMAL:
+ err = xen_pcibk_config_add_fields(dev, header_0);
+ break;
+
+ case PCI_HEADER_TYPE_BRIDGE:
+ err = xen_pcibk_config_add_fields(dev, header_1);
+ break;
+
+ default:
+ err = -EINVAL;
+ pr_err("%s: Unsupported header type %d!\n",
+ pci_name(dev), dev->hdr_type);
+ break;
+ }
+
+out:
+ return err;
+}
diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c
new file mode 100644
index 000000000..ed593d104
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_quirks.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Backend - Handle special overlays for broken devices.
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ * Author: Chris Bookholt <hap10@epoch.ncsc.mil>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include "pciback.h"
+#include "conf_space.h"
+#include "conf_space_quirks.h"
+
+LIST_HEAD(xen_pcibk_quirks);
+static inline const struct pci_device_id *
+match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
+{
+ if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
+ (id->device == PCI_ANY_ID || id->device == dev->device) &&
+ (id->subvendor == PCI_ANY_ID ||
+ id->subvendor == dev->subsystem_vendor) &&
+ (id->subdevice == PCI_ANY_ID ||
+ id->subdevice == dev->subsystem_device) &&
+ !((id->class ^ dev->class) & id->class_mask))
+ return id;
+ return NULL;
+}
+
+static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)
+{
+ struct xen_pcibk_config_quirk *tmp_quirk;
+
+ list_for_each_entry(tmp_quirk, &xen_pcibk_quirks, quirks_list)
+ if (match_one_device(&tmp_quirk->devid, dev) != NULL)
+ goto out;
+ tmp_quirk = NULL;
+ printk(KERN_DEBUG DRV_NAME
+ ": quirk didn't match any device known\n");
+out:
+ return tmp_quirk;
+}
+
+static inline void register_quirk(struct xen_pcibk_config_quirk *quirk)
+{
+ list_add_tail(&quirk->quirks_list, &xen_pcibk_quirks);
+}
+
+int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg)
+{
+ int ret = 0;
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ struct config_field_entry *cfg_entry;
+
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+ if (OFFSET(cfg_entry) == reg) {
+ ret = 1;
+ break;
+ }
+ }
+ return ret;
+}
+
+int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
+ *field)
+{
+ int err = 0;
+
+ switch (field->size) {
+ case 1:
+ field->u.b.read = xen_pcibk_read_config_byte;
+ field->u.b.write = xen_pcibk_write_config_byte;
+ break;
+ case 2:
+ field->u.w.read = xen_pcibk_read_config_word;
+ field->u.w.write = xen_pcibk_write_config_word;
+ break;
+ case 4:
+ field->u.dw.read = xen_pcibk_read_config_dword;
+ field->u.dw.write = xen_pcibk_write_config_dword;
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ xen_pcibk_config_add_field(dev, field);
+
+out:
+ return err;
+}
+
+int xen_pcibk_config_quirks_init(struct pci_dev *dev)
+{
+ struct xen_pcibk_config_quirk *quirk;
+ int ret = 0;
+
+ quirk = kzalloc(sizeof(*quirk), GFP_KERNEL);
+ if (!quirk) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ quirk->devid.vendor = dev->vendor;
+ quirk->devid.device = dev->device;
+ quirk->devid.subvendor = dev->subsystem_vendor;
+ quirk->devid.subdevice = dev->subsystem_device;
+ quirk->devid.class = 0;
+ quirk->devid.class_mask = 0;
+ quirk->devid.driver_data = 0UL;
+
+ quirk->pdev = dev;
+
+ register_quirk(quirk);
+out:
+ return ret;
+}
+
+void xen_pcibk_config_field_free(struct config_field *field)
+{
+ kfree(field);
+}
+
+int xen_pcibk_config_quirk_release(struct pci_dev *dev)
+{
+ struct xen_pcibk_config_quirk *quirk;
+ int ret = 0;
+
+ quirk = xen_pcibk_find_quirk(dev);
+ if (!quirk) {
+ ret = -ENXIO;
+ goto out;
+ }
+
+ list_del(&quirk->quirks_list);
+ kfree(quirk);
+
+out:
+ return ret;
+}
diff --git a/drivers/xen/xen-pciback/conf_space_quirks.h b/drivers/xen/xen-pciback/conf_space_quirks.h
new file mode 100644
index 000000000..d873abe35
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_quirks.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PCI Backend - Data structures for special overlays for broken devices.
+ *
+ * Ryan Wilson <hap9@epoch.ncsc.mil>
+ * Chris Bookholt <hap10@epoch.ncsc.mil>
+ */
+
+#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
+#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
+
+#include <linux/pci.h>
+#include <linux/list.h>
+
+struct xen_pcibk_config_quirk {
+ struct list_head quirks_list;
+ struct pci_device_id devid;
+ struct pci_dev *pdev;
+};
+
+int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
+ *field);
+
+int xen_pcibk_config_quirks_remove_field(struct pci_dev *dev, int reg);
+
+int xen_pcibk_config_quirks_init(struct pci_dev *dev);
+
+void xen_pcibk_config_field_free(struct config_field *field);
+
+int xen_pcibk_config_quirk_release(struct pci_dev *dev);
+
+int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg);
+
+#endif
diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c
new file mode 100644
index 000000000..66e9b814c
--- /dev/null
+++ b/drivers/xen/xen-pciback/passthrough.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Backend - Provides restricted access to the real PCI bus topology
+ * to the frontend
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include "pciback.h"
+
+struct passthrough_dev_data {
+ /* Access to dev_list must be protected by lock */
+ struct list_head dev_list;
+ struct mutex lock;
+};
+
+static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
+ unsigned int domain,
+ unsigned int bus,
+ unsigned int devfn)
+{
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
+ struct pci_dev_entry *dev_entry;
+ struct pci_dev *dev = NULL;
+
+ mutex_lock(&dev_data->lock);
+
+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
+ if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
+ && bus == (unsigned int)dev_entry->dev->bus->number
+ && devfn == dev_entry->dev->devfn) {
+ dev = dev_entry->dev;
+ break;
+ }
+ }
+
+ mutex_unlock(&dev_data->lock);
+
+ return dev;
+}
+
+static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev,
+ int devid, publish_pci_dev_cb publish_cb)
+{
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
+ struct pci_dev_entry *dev_entry;
+ unsigned int domain, bus, devfn;
+ int err;
+
+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
+ if (!dev_entry)
+ return -ENOMEM;
+ dev_entry->dev = dev;
+
+ mutex_lock(&dev_data->lock);
+ list_add_tail(&dev_entry->list, &dev_data->dev_list);
+ mutex_unlock(&dev_data->lock);
+
+ /* Publish this device. */
+ domain = (unsigned int)pci_domain_nr(dev->bus);
+ bus = (unsigned int)dev->bus->number;
+ devfn = dev->devfn;
+ err = publish_cb(pdev, domain, bus, devfn, devid);
+
+ return err;
+}
+
+static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev, bool lock)
+{
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
+ struct pci_dev_entry *dev_entry, *t;
+ struct pci_dev *found_dev = NULL;
+
+ mutex_lock(&dev_data->lock);
+
+ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
+ if (dev_entry->dev == dev) {
+ list_del(&dev_entry->list);
+ found_dev = dev_entry->dev;
+ kfree(dev_entry);
+ }
+ }
+
+ mutex_unlock(&dev_data->lock);
+
+ if (found_dev) {
+ if (lock)
+ device_lock(&found_dev->dev);
+ pcistub_put_pci_dev(found_dev);
+ if (lock)
+ device_unlock(&found_dev->dev);
+ }
+}
+
+static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
+{
+ struct passthrough_dev_data *dev_data;
+
+ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
+ if (!dev_data)
+ return -ENOMEM;
+
+ mutex_init(&dev_data->lock);
+
+ INIT_LIST_HEAD(&dev_data->dev_list);
+
+ pdev->pci_dev_data = dev_data;
+
+ return 0;
+}
+
+static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
+ publish_pci_root_cb publish_root_cb)
+{
+ int err = 0;
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
+ struct pci_dev_entry *dev_entry, *e;
+ struct pci_dev *dev;
+ int found;
+ unsigned int domain, bus;
+
+ mutex_lock(&dev_data->lock);
+
+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
+ /* Only publish this device as a root if none of its
+ * parent bridges are exported
+ */
+ found = 0;
+ dev = dev_entry->dev->bus->self;
+ for (; !found && dev != NULL; dev = dev->bus->self) {
+ list_for_each_entry(e, &dev_data->dev_list, list) {
+ if (dev == e->dev) {
+ found = 1;
+ break;
+ }
+ }
+ }
+
+ domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
+ bus = (unsigned int)dev_entry->dev->bus->number;
+
+ if (!found) {
+ err = publish_root_cb(pdev, domain, bus);
+ if (err)
+ break;
+ }
+ }
+
+ mutex_unlock(&dev_data->lock);
+
+ return err;
+}
+
+static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
+{
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
+ struct pci_dev_entry *dev_entry, *t;
+
+ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
+ struct pci_dev *dev = dev_entry->dev;
+ list_del(&dev_entry->list);
+ device_lock(&dev->dev);
+ pcistub_put_pci_dev(dev);
+ device_unlock(&dev->dev);
+ kfree(dev_entry);
+ }
+
+ kfree(dev_data);
+ pdev->pci_dev_data = NULL;
+}
+
+static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
+ struct xen_pcibk_device *pdev,
+ unsigned int *domain, unsigned int *bus,
+ unsigned int *devfn)
+{
+ *domain = pci_domain_nr(pcidev->bus);
+ *bus = pcidev->bus->number;
+ *devfn = pcidev->devfn;
+ return 1;
+}
+
+const struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
+ .name = "passthrough",
+ .init = __xen_pcibk_init_devices,
+ .free = __xen_pcibk_release_devices,
+ .find = __xen_pcibk_get_pcifront_dev,
+ .publish = __xen_pcibk_publish_pci_roots,
+ .release = __xen_pcibk_release_pci_dev,
+ .add = __xen_pcibk_add_pci_dev,
+ .get = __xen_pcibk_get_pci_dev,
+};
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
new file mode 100644
index 000000000..adf3aae29
--- /dev/null
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -0,0 +1,1652 @@
+/*
+ * PCI Stub Driver - Grabs devices in backend to be exported later
+ *
+ * Ryan Wilson <hap9@epoch.ncsc.mil>
+ * Chris Bookholt <hap10@epoch.ncsc.mil>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/kref.h>
+#include <linux/pci.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/atomic.h>
+#include <xen/events.h>
+#include <asm/xen/pci.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/interface/physdev.h>
+#include "pciback.h"
+#include "conf_space.h"
+#include "conf_space_quirks.h"
+
+#define PCISTUB_DRIVER_NAME "pciback"
+
+static char *pci_devs_to_hide;
+wait_queue_head_t xen_pcibk_aer_wait_queue;
+/*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops,
+* We want to avoid in middle of AER ops, xen_pcibk devices is being removed
+*/
+static DECLARE_RWSEM(pcistub_sem);
+module_param_named(hide, pci_devs_to_hide, charp, 0444);
+
+struct pcistub_device_id {
+ struct list_head slot_list;
+ int domain;
+ unsigned char bus;
+ unsigned int devfn;
+};
+static LIST_HEAD(pcistub_device_ids);
+static DEFINE_SPINLOCK(device_ids_lock);
+
+struct pcistub_device {
+ struct kref kref;
+ struct list_head dev_list;
+ spinlock_t lock;
+
+ struct pci_dev *dev;
+ struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */
+};
+
+/* Access to pcistub_devices & seized_devices lists and the initialize_devices
+ * flag must be locked with pcistub_devices_lock
+ */
+static DEFINE_SPINLOCK(pcistub_devices_lock);
+static LIST_HEAD(pcistub_devices);
+
+/* wait for device_initcall before initializing our devices
+ * (see pcistub_init_devices_late)
+ */
+static int initialize_devices;
+static LIST_HEAD(seized_devices);
+
+static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
+{
+ struct pcistub_device *psdev;
+
+ dev_dbg(&dev->dev, "pcistub_device_alloc\n");
+
+ psdev = kzalloc(sizeof(*psdev), GFP_KERNEL);
+ if (!psdev)
+ return NULL;
+
+ psdev->dev = pci_dev_get(dev);
+ if (!psdev->dev) {
+ kfree(psdev);
+ return NULL;
+ }
+
+ kref_init(&psdev->kref);
+ spin_lock_init(&psdev->lock);
+
+ return psdev;
+}
+
+/* Don't call this directly as it's called by pcistub_device_put */
+static void pcistub_device_release(struct kref *kref)
+{
+ struct pcistub_device *psdev;
+ struct pci_dev *dev;
+ struct xen_pcibk_dev_data *dev_data;
+
+ psdev = container_of(kref, struct pcistub_device, kref);
+ dev = psdev->dev;
+ dev_data = pci_get_drvdata(dev);
+
+ dev_dbg(&dev->dev, "pcistub_device_release\n");
+
+ xen_unregister_device_domain_owner(dev);
+
+ /* Call the reset function which does not take lock as this
+ * is called from "unbind" which takes a device_lock mutex.
+ */
+ __pci_reset_function_locked(dev);
+ if (dev_data &&
+ pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state))
+ dev_info(&dev->dev, "Could not reload PCI state\n");
+ else
+ pci_restore_state(dev);
+
+ if (dev->msix_cap) {
+ struct physdev_pci_device ppdev = {
+ .seg = pci_domain_nr(dev->bus),
+ .bus = dev->bus->number,
+ .devfn = dev->devfn
+ };
+ int err = HYPERVISOR_physdev_op(PHYSDEVOP_release_msix,
+ &ppdev);
+
+ if (err && err != -ENOSYS)
+ dev_warn(&dev->dev, "MSI-X release failed (%d)\n",
+ err);
+ }
+
+ /* Disable the device */
+ xen_pcibk_reset_device(dev);
+
+ kfree(dev_data);
+ pci_set_drvdata(dev, NULL);
+
+ /* Clean-up the device */
+ xen_pcibk_config_free_dyn_fields(dev);
+ xen_pcibk_config_free_dev(dev);
+
+ pci_clear_dev_assigned(dev);
+ pci_dev_put(dev);
+
+ kfree(psdev);
+}
+
+static inline void pcistub_device_get(struct pcistub_device *psdev)
+{
+ kref_get(&psdev->kref);
+}
+
+static inline void pcistub_device_put(struct pcistub_device *psdev)
+{
+ kref_put(&psdev->kref, pcistub_device_release);
+}
+
+static struct pcistub_device *pcistub_device_find_locked(int domain, int bus,
+ int slot, int func)
+{
+ struct pcistub_device *psdev;
+
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+ if (psdev->dev != NULL
+ && domain == pci_domain_nr(psdev->dev->bus)
+ && bus == psdev->dev->bus->number
+ && slot == PCI_SLOT(psdev->dev->devfn)
+ && func == PCI_FUNC(psdev->dev->devfn)) {
+ return psdev;
+ }
+ }
+
+ return NULL;
+}
+
+static struct pcistub_device *pcistub_device_find(int domain, int bus,
+ int slot, int func)
+{
+ struct pcistub_device *psdev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+ psdev = pcistub_device_find_locked(domain, bus, slot, func);
+ if (psdev)
+ pcistub_device_get(psdev);
+
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ return psdev;
+}
+
+static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev,
+ struct pcistub_device *psdev)
+{
+ struct pci_dev *pci_dev = NULL;
+ unsigned long flags;
+
+ pcistub_device_get(psdev);
+
+ spin_lock_irqsave(&psdev->lock, flags);
+ if (!psdev->pdev) {
+ psdev->pdev = pdev;
+ pci_dev = psdev->dev;
+ }
+ spin_unlock_irqrestore(&psdev->lock, flags);
+
+ if (!pci_dev)
+ pcistub_device_put(psdev);
+
+ return pci_dev;
+}
+
+struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
+ int domain, int bus,
+ int slot, int func)
+{
+ struct pcistub_device *psdev;
+ struct pci_dev *found_dev = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+ psdev = pcistub_device_find_locked(domain, bus, slot, func);
+ if (psdev)
+ found_dev = pcistub_device_get_pci_dev(pdev, psdev);
+
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ return found_dev;
+}
+
+struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev)
+{
+ struct pcistub_device *psdev;
+ struct pci_dev *found_dev = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+ if (psdev->dev == dev) {
+ found_dev = pcistub_device_get_pci_dev(pdev, psdev);
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ return found_dev;
+}
+
+/*
+ * Called when:
+ * - XenBus state has been reconfigure (pci unplug). See xen_pcibk_remove_device
+ * - XenBus state has been disconnected (guest shutdown). See xen_pcibk_xenbus_remove
+ * - 'echo BDF > unbind' on pciback module with no guest attached. See pcistub_remove
+ * - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
+ *
+ * As such we have to be careful.
+ *
+ * To make this easier, the caller has to hold the device lock.
+ */
+void pcistub_put_pci_dev(struct pci_dev *dev)
+{
+ struct pcistub_device *psdev, *found_psdev = NULL;
+ unsigned long flags;
+ struct xen_pcibk_dev_data *dev_data;
+ int ret;
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+ if (psdev->dev == dev) {
+ found_psdev = psdev;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ if (WARN_ON(!found_psdev))
+ return;
+
+ /*hold this lock for avoiding breaking link between
+ * pcistub and xen_pcibk when AER is in processing
+ */
+ down_write(&pcistub_sem);
+ /* Cleanup our device
+ * (so it's ready for the next domain)
+ */
+ device_lock_assert(&dev->dev);
+ __pci_reset_function_locked(dev);
+
+ dev_data = pci_get_drvdata(dev);
+ ret = pci_load_saved_state(dev, dev_data->pci_saved_state);
+ if (!ret) {
+ /*
+ * The usual sequence is pci_save_state & pci_restore_state
+ * but the guest might have messed the configuration space up.
+ * Use the initial version (when device was bound to us).
+ */
+ pci_restore_state(dev);
+ } else
+ dev_info(&dev->dev, "Could not reload PCI state\n");
+ /* This disables the device. */
+ xen_pcibk_reset_device(dev);
+
+ /* And cleanup up our emulated fields. */
+ xen_pcibk_config_reset_dev(dev);
+ xen_pcibk_config_free_dyn_fields(dev);
+
+ xen_unregister_device_domain_owner(dev);
+
+ spin_lock_irqsave(&found_psdev->lock, flags);
+ found_psdev->pdev = NULL;
+ spin_unlock_irqrestore(&found_psdev->lock, flags);
+
+ pcistub_device_put(found_psdev);
+ up_write(&pcistub_sem);
+}
+
+static int pcistub_match_one(struct pci_dev *dev,
+ struct pcistub_device_id *pdev_id)
+{
+ /* Match the specified device by domain, bus, slot, func and also if
+ * any of the device's parent bridges match.
+ */
+ for (; dev != NULL; dev = dev->bus->self) {
+ if (pci_domain_nr(dev->bus) == pdev_id->domain
+ && dev->bus->number == pdev_id->bus
+ && dev->devfn == pdev_id->devfn)
+ return 1;
+
+ /* Sometimes topmost bridge links to itself. */
+ if (dev == dev->bus->self)
+ break;
+ }
+
+ return 0;
+}
+
+static int pcistub_match(struct pci_dev *dev)
+{
+ struct pcistub_device_id *pdev_id;
+ unsigned long flags;
+ int found = 0;
+
+ spin_lock_irqsave(&device_ids_lock, flags);
+ list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
+ if (pcistub_match_one(dev, pdev_id)) {
+ found = 1;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&device_ids_lock, flags);
+
+ return found;
+}
+
+static int pcistub_init_device(struct pci_dev *dev)
+{
+ struct xen_pcibk_dev_data *dev_data;
+ int err = 0;
+
+ dev_dbg(&dev->dev, "initializing...\n");
+
+ /* The PCI backend is not intended to be a module (or to work with
+ * removable PCI devices (yet). If it were, xen_pcibk_config_free()
+ * would need to be called somewhere to free the memory allocated
+ * here and then to call kfree(pci_get_drvdata(psdev->dev)).
+ */
+ dev_data = kzalloc(sizeof(*dev_data) + strlen(DRV_NAME "[]")
+ + strlen(pci_name(dev)) + 1, GFP_KERNEL);
+ if (!dev_data) {
+ err = -ENOMEM;
+ goto out;
+ }
+ pci_set_drvdata(dev, dev_data);
+
+ /*
+ * Setup name for fake IRQ handler. It will only be enabled
+ * once the device is turned on by the guest.
+ */
+ sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
+
+ dev_dbg(&dev->dev, "initializing config\n");
+
+ init_waitqueue_head(&xen_pcibk_aer_wait_queue);
+ err = xen_pcibk_config_init_dev(dev);
+ if (err)
+ goto out;
+
+ /* HACK: Force device (& ACPI) to determine what IRQ it's on - we
+ * must do this here because pcibios_enable_device may specify
+ * the pci device's true irq (and possibly its other resources)
+ * if they differ from what's in the configuration space.
+ * This makes the assumption that the device's resources won't
+ * change after this point (otherwise this code may break!)
+ */
+ dev_dbg(&dev->dev, "enabling device\n");
+ err = pci_enable_device(dev);
+ if (err)
+ goto config_release;
+
+ if (dev->msix_cap) {
+ struct physdev_pci_device ppdev = {
+ .seg = pci_domain_nr(dev->bus),
+ .bus = dev->bus->number,
+ .devfn = dev->devfn
+ };
+
+ err = HYPERVISOR_physdev_op(PHYSDEVOP_prepare_msix, &ppdev);
+ if (err && err != -ENOSYS)
+ dev_err(&dev->dev, "MSI-X preparation failed (%d)\n",
+ err);
+ }
+
+ /* We need the device active to save the state. */
+ dev_dbg(&dev->dev, "save state of device\n");
+ pci_save_state(dev);
+ dev_data->pci_saved_state = pci_store_saved_state(dev);
+ if (!dev_data->pci_saved_state)
+ dev_err(&dev->dev, "Could not store PCI conf saved state!\n");
+ else {
+ dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n");
+ __pci_reset_function_locked(dev);
+ pci_restore_state(dev);
+ }
+ /* Now disable the device (this also ensures some private device
+ * data is setup before we export)
+ */
+ dev_dbg(&dev->dev, "reset device\n");
+ xen_pcibk_reset_device(dev);
+
+ pci_set_dev_assigned(dev);
+ return 0;
+
+config_release:
+ xen_pcibk_config_free_dev(dev);
+
+out:
+ pci_set_drvdata(dev, NULL);
+ kfree(dev_data);
+ return err;
+}
+
+/*
+ * Because some initialization still happens on
+ * devices during fs_initcall, we need to defer
+ * full initialization of our devices until
+ * device_initcall.
+ */
+static int __init pcistub_init_devices_late(void)
+{
+ struct pcistub_device *psdev;
+ unsigned long flags;
+ int err = 0;
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+ while (!list_empty(&seized_devices)) {
+ psdev = container_of(seized_devices.next,
+ struct pcistub_device, dev_list);
+ list_del(&psdev->dev_list);
+
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+ err = pcistub_init_device(psdev->dev);
+ if (err) {
+ dev_err(&psdev->dev->dev,
+ "error %d initializing device\n", err);
+ kfree(psdev);
+ psdev = NULL;
+ }
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+ if (psdev)
+ list_add_tail(&psdev->dev_list, &pcistub_devices);
+ }
+
+ initialize_devices = 1;
+
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+ return 0;
+}
+
+static void pcistub_device_id_add_list(struct pcistub_device_id *new,
+ int domain, int bus, unsigned int devfn)
+{
+ struct pcistub_device_id *pci_dev_id;
+ unsigned long flags;
+ int found = 0;
+
+ spin_lock_irqsave(&device_ids_lock, flags);
+
+ list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
+ if (pci_dev_id->domain == domain && pci_dev_id->bus == bus &&
+ pci_dev_id->devfn == devfn) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ new->domain = domain;
+ new->bus = bus;
+ new->devfn = devfn;
+ list_add_tail(&new->slot_list, &pcistub_device_ids);
+ }
+
+ spin_unlock_irqrestore(&device_ids_lock, flags);
+
+ if (found)
+ kfree(new);
+}
+
+static int pcistub_seize(struct pci_dev *dev,
+ struct pcistub_device_id *pci_dev_id)
+{
+ struct pcistub_device *psdev;
+ unsigned long flags;
+ int err = 0;
+
+ psdev = pcistub_device_alloc(dev);
+ if (!psdev) {
+ kfree(pci_dev_id);
+ return -ENOMEM;
+ }
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+ if (initialize_devices) {
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+ /* don't want irqs disabled when calling pcistub_init_device */
+ err = pcistub_init_device(psdev->dev);
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+ if (!err)
+ list_add(&psdev->dev_list, &pcistub_devices);
+ } else {
+ dev_dbg(&dev->dev, "deferring initialization\n");
+ list_add(&psdev->dev_list, &seized_devices);
+ }
+
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+ if (err) {
+ kfree(pci_dev_id);
+ pcistub_device_put(psdev);
+ } else if (pci_dev_id)
+ pcistub_device_id_add_list(pci_dev_id, pci_domain_nr(dev->bus),
+ dev->bus->number, dev->devfn);
+
+ return err;
+}
+
+/* Called when 'bind'. This means we must _NOT_ call pci_reset_function or
+ * other functions that take the sysfs lock. */
+static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ int err = 0, match;
+ struct pcistub_device_id *pci_dev_id = NULL;
+
+ dev_dbg(&dev->dev, "probing...\n");
+
+ match = pcistub_match(dev);
+
+ if ((dev->driver_override &&
+ !strcmp(dev->driver_override, PCISTUB_DRIVER_NAME)) ||
+ match) {
+
+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
+ && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
+ dev_err(&dev->dev, "can't export pci devices that "
+ "don't have a normal (0) or bridge (1) "
+ "header type!\n");
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (!match) {
+ pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
+ if (!pci_dev_id) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
+ dev_info(&dev->dev, "seizing device\n");
+ err = pcistub_seize(dev, pci_dev_id);
+ } else
+ /* Didn't find the device */
+ err = -ENODEV;
+
+out:
+ return err;
+}
+
+/* Called when 'unbind'. This means we must _NOT_ call pci_reset_function or
+ * other functions that take the sysfs lock. */
+static void pcistub_remove(struct pci_dev *dev)
+{
+ struct pcistub_device *psdev, *found_psdev = NULL;
+ unsigned long flags;
+
+ dev_dbg(&dev->dev, "removing\n");
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+ xen_pcibk_config_quirk_release(dev);
+
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+ if (psdev->dev == dev) {
+ found_psdev = psdev;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+ if (found_psdev) {
+ dev_dbg(&dev->dev, "found device to remove %s\n",
+ found_psdev->pdev ? "- in-use" : "");
+
+ if (found_psdev->pdev) {
+ int domid = xen_find_device_domain_owner(dev);
+
+ pr_warn("****** removing device %s while still in-use by domain %d! ******\n",
+ pci_name(found_psdev->dev), domid);
+ pr_warn("****** driver domain may still access this device's i/o resources!\n");
+ pr_warn("****** shutdown driver domain before binding device\n");
+ pr_warn("****** to other drivers or domains\n");
+
+ /* N.B. This ends up calling pcistub_put_pci_dev which ends up
+ * doing the FLR. */
+ xen_pcibk_release_pci_dev(found_psdev->pdev,
+ found_psdev->dev,
+ false /* caller holds the lock. */);
+ }
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+ list_del(&found_psdev->dev_list);
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+ /* the final put for releasing from the list */
+ pcistub_device_put(found_psdev);
+ }
+}
+
+static const struct pci_device_id pcistub_ids[] = {
+ {
+ .vendor = PCI_ANY_ID,
+ .device = PCI_ANY_ID,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ },
+ {0,},
+};
+
+#define PCI_NODENAME_MAX 40
+static void kill_domain_by_device(struct pcistub_device *psdev)
+{
+ struct xenbus_transaction xbt;
+ int err;
+ char nodename[PCI_NODENAME_MAX];
+
+ BUG_ON(!psdev);
+ snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
+ psdev->pdev->xdev->otherend_id);
+
+again:
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ dev_err(&psdev->dev->dev,
+ "error %d when start xenbus transaction\n", err);
+ return;
+ }
+ /*PV AER handlers will set this flag*/
+ xenbus_printf(xbt, nodename, "aerState" , "aerfail");
+ err = xenbus_transaction_end(xbt, 0);
+ if (err) {
+ if (err == -EAGAIN)
+ goto again;
+ dev_err(&psdev->dev->dev,
+ "error %d when end xenbus transaction\n", err);
+ return;
+ }
+}
+
+/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
+ * backend need to have cooperation. In xen_pcibk, those steps will do similar
+ * jobs: send service request and waiting for front_end response.
+*/
+static pci_ers_result_t common_process(struct pcistub_device *psdev,
+ pci_channel_state_t state, int aer_cmd,
+ pci_ers_result_t result)
+{
+ pci_ers_result_t res = result;
+ struct xen_pcie_aer_op *aer_op;
+ struct xen_pcibk_device *pdev = psdev->pdev;
+ struct xen_pci_sharedinfo *sh_info = pdev->sh_info;
+ int ret;
+
+ /*with PV AER drivers*/
+ aer_op = &(sh_info->aer_op);
+ aer_op->cmd = aer_cmd ;
+ /*useful for error_detected callback*/
+ aer_op->err = state;
+ /*pcifront_end BDF*/
+ ret = xen_pcibk_get_pcifront_dev(psdev->dev, psdev->pdev,
+ &aer_op->domain, &aer_op->bus, &aer_op->devfn);
+ if (!ret) {
+ dev_err(&psdev->dev->dev,
+ DRV_NAME ": failed to get pcifront device\n");
+ return PCI_ERS_RESULT_NONE;
+ }
+ wmb();
+
+ dev_dbg(&psdev->dev->dev,
+ DRV_NAME ": aer_op %x dom %x bus %x devfn %x\n",
+ aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
+ /*local flag to mark there's aer request, xen_pcibk callback will use
+ * this flag to judge whether we need to check pci-front give aer
+ * service ack signal
+ */
+ set_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
+
+ /*It is possible that a pcifront conf_read_write ops request invokes
+ * the callback which cause the spurious execution of wake_up.
+ * Yet it is harmless and better than a spinlock here
+ */
+ set_bit(_XEN_PCIB_active,
+ (unsigned long *)&sh_info->flags);
+ wmb();
+ notify_remote_via_irq(pdev->evtchn_irq);
+
+ /* Enable IRQ to signal "request done". */
+ xen_pcibk_lateeoi(pdev, 0);
+
+ ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
+ !(test_bit(_XEN_PCIB_active, (unsigned long *)
+ &sh_info->flags)), 300*HZ);
+
+ /* Enable IRQ for pcifront request if not already active. */
+ if (!test_bit(_PDEVF_op_active, &pdev->flags))
+ xen_pcibk_lateeoi(pdev, 0);
+
+ if (!ret) {
+ if (test_bit(_XEN_PCIB_active,
+ (unsigned long *)&sh_info->flags)) {
+ dev_err(&psdev->dev->dev,
+ "pcifront aer process not responding!\n");
+ clear_bit(_XEN_PCIB_active,
+ (unsigned long *)&sh_info->flags);
+ aer_op->err = PCI_ERS_RESULT_NONE;
+ return res;
+ }
+ }
+ clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
+
+ res = (pci_ers_result_t)aer_op->err;
+ return res;
+}
+
+/*
+* xen_pcibk_slot_reset: it will send the slot_reset request to pcifront in case
+* of the device driver could provide this service, and then wait for pcifront
+* ack.
+* @dev: pointer to PCI devices
+* return value is used by aer_core do_recovery policy
+*/
+static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev)
+{
+ struct pcistub_device *psdev;
+ pci_ers_result_t result;
+
+ result = PCI_ERS_RESULT_RECOVERED;
+ dev_dbg(&dev->dev, "xen_pcibk_slot_reset(bus:%x,devfn:%x)\n",
+ dev->bus->number, dev->devfn);
+
+ down_write(&pcistub_sem);
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
+ dev->bus->number,
+ PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+
+ if (!psdev || !psdev->pdev) {
+ dev_err(&dev->dev,
+ DRV_NAME " device is not found/assigned\n");
+ goto end;
+ }
+
+ if (!psdev->pdev->sh_info) {
+ dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+ " by HVM, kill it\n");
+ kill_domain_by_device(psdev);
+ goto end;
+ }
+
+ if (!test_bit(_XEN_PCIB_AERHANDLER,
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
+ dev_err(&dev->dev,
+ "guest with no AER driver should have been killed\n");
+ goto end;
+ }
+ result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
+
+ if (result == PCI_ERS_RESULT_NONE ||
+ result == PCI_ERS_RESULT_DISCONNECT) {
+ dev_dbg(&dev->dev,
+ "No AER slot_reset service or disconnected!\n");
+ kill_domain_by_device(psdev);
+ }
+end:
+ if (psdev)
+ pcistub_device_put(psdev);
+ up_write(&pcistub_sem);
+ return result;
+
+}
+
+
+/*xen_pcibk_mmio_enabled: it will send the mmio_enabled request to pcifront
+* in case of the device driver could provide this service, and then wait
+* for pcifront ack
+* @dev: pointer to PCI devices
+* return value is used by aer_core do_recovery policy
+*/
+
+static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev)
+{
+ struct pcistub_device *psdev;
+ pci_ers_result_t result;
+
+ result = PCI_ERS_RESULT_RECOVERED;
+ dev_dbg(&dev->dev, "xen_pcibk_mmio_enabled(bus:%x,devfn:%x)\n",
+ dev->bus->number, dev->devfn);
+
+ down_write(&pcistub_sem);
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
+ dev->bus->number,
+ PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+
+ if (!psdev || !psdev->pdev) {
+ dev_err(&dev->dev,
+ DRV_NAME " device is not found/assigned\n");
+ goto end;
+ }
+
+ if (!psdev->pdev->sh_info) {
+ dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+ " by HVM, kill it\n");
+ kill_domain_by_device(psdev);
+ goto end;
+ }
+
+ if (!test_bit(_XEN_PCIB_AERHANDLER,
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
+ dev_err(&dev->dev,
+ "guest with no AER driver should have been killed\n");
+ goto end;
+ }
+ result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
+
+ if (result == PCI_ERS_RESULT_NONE ||
+ result == PCI_ERS_RESULT_DISCONNECT) {
+ dev_dbg(&dev->dev,
+ "No AER mmio_enabled service or disconnected!\n");
+ kill_domain_by_device(psdev);
+ }
+end:
+ if (psdev)
+ pcistub_device_put(psdev);
+ up_write(&pcistub_sem);
+ return result;
+}
+
+/*xen_pcibk_error_detected: it will send the error_detected request to pcifront
+* in case of the device driver could provide this service, and then wait
+* for pcifront ack.
+* @dev: pointer to PCI devices
+* @error: the current PCI connection state
+* return value is used by aer_core do_recovery policy
+*/
+
+static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
+ pci_channel_state_t error)
+{
+ struct pcistub_device *psdev;
+ pci_ers_result_t result;
+
+ result = PCI_ERS_RESULT_CAN_RECOVER;
+ dev_dbg(&dev->dev, "xen_pcibk_error_detected(bus:%x,devfn:%x)\n",
+ dev->bus->number, dev->devfn);
+
+ down_write(&pcistub_sem);
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
+ dev->bus->number,
+ PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+
+ if (!psdev || !psdev->pdev) {
+ dev_err(&dev->dev,
+ DRV_NAME " device is not found/assigned\n");
+ goto end;
+ }
+
+ if (!psdev->pdev->sh_info) {
+ dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+ " by HVM, kill it\n");
+ kill_domain_by_device(psdev);
+ goto end;
+ }
+
+ /*Guest owns the device yet no aer handler regiested, kill guest*/
+ if (!test_bit(_XEN_PCIB_AERHANDLER,
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
+ dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
+ kill_domain_by_device(psdev);
+ goto end;
+ }
+ result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
+
+ if (result == PCI_ERS_RESULT_NONE ||
+ result == PCI_ERS_RESULT_DISCONNECT) {
+ dev_dbg(&dev->dev,
+ "No AER error_detected service or disconnected!\n");
+ kill_domain_by_device(psdev);
+ }
+end:
+ if (psdev)
+ pcistub_device_put(psdev);
+ up_write(&pcistub_sem);
+ return result;
+}
+
+/*xen_pcibk_error_resume: it will send the error_resume request to pcifront
+* in case of the device driver could provide this service, and then wait
+* for pcifront ack.
+* @dev: pointer to PCI devices
+*/
+
+static void xen_pcibk_error_resume(struct pci_dev *dev)
+{
+ struct pcistub_device *psdev;
+
+ dev_dbg(&dev->dev, "xen_pcibk_error_resume(bus:%x,devfn:%x)\n",
+ dev->bus->number, dev->devfn);
+
+ down_write(&pcistub_sem);
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
+ dev->bus->number,
+ PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+
+ if (!psdev || !psdev->pdev) {
+ dev_err(&dev->dev,
+ DRV_NAME " device is not found/assigned\n");
+ goto end;
+ }
+
+ if (!psdev->pdev->sh_info) {
+ dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+ " by HVM, kill it\n");
+ kill_domain_by_device(psdev);
+ goto end;
+ }
+
+ if (!test_bit(_XEN_PCIB_AERHANDLER,
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
+ dev_err(&dev->dev,
+ "guest with no AER driver should have been killed\n");
+ kill_domain_by_device(psdev);
+ goto end;
+ }
+ common_process(psdev, 1, XEN_PCI_OP_aer_resume,
+ PCI_ERS_RESULT_RECOVERED);
+end:
+ if (psdev)
+ pcistub_device_put(psdev);
+ up_write(&pcistub_sem);
+ return;
+}
+
+/*add xen_pcibk AER handling*/
+static const struct pci_error_handlers xen_pcibk_error_handler = {
+ .error_detected = xen_pcibk_error_detected,
+ .mmio_enabled = xen_pcibk_mmio_enabled,
+ .slot_reset = xen_pcibk_slot_reset,
+ .resume = xen_pcibk_error_resume,
+};
+
+/*
+ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
+ * for a normal device. I don't want it to be loaded automatically.
+ */
+
+static struct pci_driver xen_pcibk_pci_driver = {
+ /* The name should be xen_pciback, but until the tools are updated
+ * we will keep it as pciback. */
+ .name = PCISTUB_DRIVER_NAME,
+ .id_table = pcistub_ids,
+ .probe = pcistub_probe,
+ .remove = pcistub_remove,
+ .err_handler = &xen_pcibk_error_handler,
+};
+
+static inline int str_to_slot(const char *buf, int *domain, int *bus,
+ int *slot, int *func)
+{
+ int parsed = 0;
+
+ switch (sscanf(buf, " %x:%x:%x.%x %n", domain, bus, slot, func,
+ &parsed)) {
+ case 3:
+ *func = -1;
+ sscanf(buf, " %x:%x:%x.* %n", domain, bus, slot, &parsed);
+ break;
+ case 2:
+ *slot = *func = -1;
+ sscanf(buf, " %x:%x:*.* %n", domain, bus, &parsed);
+ break;
+ }
+ if (parsed && !buf[parsed])
+ return 0;
+
+ /* try again without domain */
+ *domain = 0;
+ switch (sscanf(buf, " %x:%x.%x %n", bus, slot, func, &parsed)) {
+ case 2:
+ *func = -1;
+ sscanf(buf, " %x:%x.* %n", bus, slot, &parsed);
+ break;
+ case 1:
+ *slot = *func = -1;
+ sscanf(buf, " %x:*.* %n", bus, &parsed);
+ break;
+ }
+ if (parsed && !buf[parsed])
+ return 0;
+
+ return -EINVAL;
+}
+
+static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
+ *slot, int *func, int *reg, int *size, int *mask)
+{
+ int parsed = 0;
+
+ sscanf(buf, " %x:%x:%x.%x-%x:%x:%x %n", domain, bus, slot, func,
+ reg, size, mask, &parsed);
+ if (parsed && !buf[parsed])
+ return 0;
+
+ /* try again without domain */
+ *domain = 0;
+ sscanf(buf, " %x:%x.%x-%x:%x:%x %n", bus, slot, func, reg, size,
+ mask, &parsed);
+ if (parsed && !buf[parsed])
+ return 0;
+
+ return -EINVAL;
+}
+
+static int pcistub_device_id_add(int domain, int bus, int slot, int func)
+{
+ struct pcistub_device_id *pci_dev_id;
+ int rc = 0, devfn = PCI_DEVFN(slot, func);
+
+ if (slot < 0) {
+ for (slot = 0; !rc && slot < 32; ++slot)
+ rc = pcistub_device_id_add(domain, bus, slot, func);
+ return rc;
+ }
+
+ if (func < 0) {
+ for (func = 0; !rc && func < 8; ++func)
+ rc = pcistub_device_id_add(domain, bus, slot, func);
+ return rc;
+ }
+
+ if ((
+#if !defined(MODULE) /* pci_domains_supported is not being exported */ \
+ || !defined(CONFIG_PCI_DOMAINS)
+ !pci_domains_supported ? domain :
+#endif
+ domain < 0 || domain > 0xffff)
+ || bus < 0 || bus > 0xff
+ || PCI_SLOT(devfn) != slot
+ || PCI_FUNC(devfn) != func)
+ return -EINVAL;
+
+ pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
+ if (!pci_dev_id)
+ return -ENOMEM;
+
+ pr_debug("wants to seize %04x:%02x:%02x.%d\n",
+ domain, bus, slot, func);
+
+ pcistub_device_id_add_list(pci_dev_id, domain, bus, devfn);
+
+ return 0;
+}
+
+static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
+{
+ struct pcistub_device_id *pci_dev_id, *t;
+ int err = -ENOENT;
+ unsigned long flags;
+
+ spin_lock_irqsave(&device_ids_lock, flags);
+ list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
+ slot_list) {
+ if (pci_dev_id->domain == domain && pci_dev_id->bus == bus
+ && (slot < 0 || PCI_SLOT(pci_dev_id->devfn) == slot)
+ && (func < 0 || PCI_FUNC(pci_dev_id->devfn) == func)) {
+ /* Don't break; here because it's possible the same
+ * slot could be in the list more than once
+ */
+ list_del(&pci_dev_id->slot_list);
+ kfree(pci_dev_id);
+
+ err = 0;
+
+ pr_debug("removed %04x:%02x:%02x.%d from seize list\n",
+ domain, bus, slot, func);
+ }
+ }
+ spin_unlock_irqrestore(&device_ids_lock, flags);
+
+ return err;
+}
+
+static int pcistub_reg_add(int domain, int bus, int slot, int func,
+ unsigned int reg, unsigned int size,
+ unsigned int mask)
+{
+ int err = 0;
+ struct pcistub_device *psdev;
+ struct pci_dev *dev;
+ struct config_field *field;
+
+ if (reg > 0xfff || (size < 4 && (mask >> (size * 8))))
+ return -EINVAL;
+
+ psdev = pcistub_device_find(domain, bus, slot, func);
+ if (!psdev) {
+ err = -ENODEV;
+ goto out;
+ }
+ dev = psdev->dev;
+
+ field = kzalloc(sizeof(*field), GFP_KERNEL);
+ if (!field) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ field->offset = reg;
+ field->size = size;
+ field->mask = mask;
+ field->init = NULL;
+ field->reset = NULL;
+ field->release = NULL;
+ field->clean = xen_pcibk_config_field_free;
+
+ err = xen_pcibk_config_quirks_add_field(dev, field);
+ if (err)
+ kfree(field);
+out:
+ if (psdev)
+ pcistub_device_put(psdev);
+ return err;
+}
+
+static ssize_t new_slot_store(struct device_driver *drv, const char *buf,
+ size_t count)
+{
+ int domain, bus, slot, func;
+ int err;
+
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
+ if (err)
+ goto out;
+
+ err = pcistub_device_id_add(domain, bus, slot, func);
+
+out:
+ if (!err)
+ err = count;
+ return err;
+}
+static DRIVER_ATTR_WO(new_slot);
+
+static ssize_t remove_slot_store(struct device_driver *drv, const char *buf,
+ size_t count)
+{
+ int domain, bus, slot, func;
+ int err;
+
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
+ if (err)
+ goto out;
+
+ err = pcistub_device_id_remove(domain, bus, slot, func);
+
+out:
+ if (!err)
+ err = count;
+ return err;
+}
+static DRIVER_ATTR_WO(remove_slot);
+
+static ssize_t slots_show(struct device_driver *drv, char *buf)
+{
+ struct pcistub_device_id *pci_dev_id;
+ size_t count = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&device_ids_lock, flags);
+ list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
+ if (count >= PAGE_SIZE)
+ break;
+
+ count += scnprintf(buf + count, PAGE_SIZE - count,
+ "%04x:%02x:%02x.%d\n",
+ pci_dev_id->domain, pci_dev_id->bus,
+ PCI_SLOT(pci_dev_id->devfn),
+ PCI_FUNC(pci_dev_id->devfn));
+ }
+ spin_unlock_irqrestore(&device_ids_lock, flags);
+
+ return count;
+}
+static DRIVER_ATTR_RO(slots);
+
+static ssize_t irq_handlers_show(struct device_driver *drv, char *buf)
+{
+ struct pcistub_device *psdev;
+ struct xen_pcibk_dev_data *dev_data;
+ size_t count = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+ if (count >= PAGE_SIZE)
+ break;
+ if (!psdev->dev)
+ continue;
+ dev_data = pci_get_drvdata(psdev->dev);
+ if (!dev_data)
+ continue;
+ count +=
+ scnprintf(buf + count, PAGE_SIZE - count,
+ "%s:%s:%sing:%ld\n",
+ pci_name(psdev->dev),
+ dev_data->isr_on ? "on" : "off",
+ dev_data->ack_intr ? "ack" : "not ack",
+ dev_data->handled);
+ }
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ return count;
+}
+static DRIVER_ATTR_RO(irq_handlers);
+
+static ssize_t irq_handler_state_store(struct device_driver *drv,
+ const char *buf, size_t count)
+{
+ struct pcistub_device *psdev;
+ struct xen_pcibk_dev_data *dev_data;
+ int domain, bus, slot, func;
+ int err;
+
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
+ if (err)
+ return err;
+
+ psdev = pcistub_device_find(domain, bus, slot, func);
+ if (!psdev) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ dev_data = pci_get_drvdata(psdev->dev);
+ if (!dev_data) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
+ dev_data->irq_name, dev_data->isr_on,
+ !dev_data->isr_on);
+
+ dev_data->isr_on = !(dev_data->isr_on);
+ if (dev_data->isr_on)
+ dev_data->ack_intr = 1;
+out:
+ if (psdev)
+ pcistub_device_put(psdev);
+ if (!err)
+ err = count;
+ return err;
+}
+static DRIVER_ATTR_WO(irq_handler_state);
+
+static ssize_t quirks_store(struct device_driver *drv, const char *buf,
+ size_t count)
+{
+ int domain, bus, slot, func, reg, size, mask;
+ int err;
+
+ err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
+ &mask);
+ if (err)
+ goto out;
+
+ err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
+
+out:
+ if (!err)
+ err = count;
+ return err;
+}
+
+static ssize_t quirks_show(struct device_driver *drv, char *buf)
+{
+ int count = 0;
+ unsigned long flags;
+ struct xen_pcibk_config_quirk *quirk;
+ struct xen_pcibk_dev_data *dev_data;
+ const struct config_field *field;
+ const struct config_field_entry *cfg_entry;
+
+ spin_lock_irqsave(&device_ids_lock, flags);
+ list_for_each_entry(quirk, &xen_pcibk_quirks, quirks_list) {
+ if (count >= PAGE_SIZE)
+ goto out;
+
+ count += scnprintf(buf + count, PAGE_SIZE - count,
+ "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
+ quirk->pdev->bus->number,
+ PCI_SLOT(quirk->pdev->devfn),
+ PCI_FUNC(quirk->pdev->devfn),
+ quirk->devid.vendor, quirk->devid.device,
+ quirk->devid.subvendor,
+ quirk->devid.subdevice);
+
+ dev_data = pci_get_drvdata(quirk->pdev);
+
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+ if (count >= PAGE_SIZE)
+ goto out;
+
+ count += scnprintf(buf + count, PAGE_SIZE - count,
+ "\t\t%08x:%01x:%08x\n",
+ cfg_entry->base_offset +
+ field->offset, field->size,
+ field->mask);
+ }
+ }
+
+out:
+ spin_unlock_irqrestore(&device_ids_lock, flags);
+
+ return count;
+}
+static DRIVER_ATTR_RW(quirks);
+
+static ssize_t permissive_store(struct device_driver *drv, const char *buf,
+ size_t count)
+{
+ int domain, bus, slot, func;
+ int err;
+ struct pcistub_device *psdev;
+ struct xen_pcibk_dev_data *dev_data;
+
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
+ if (err)
+ goto out;
+
+ psdev = pcistub_device_find(domain, bus, slot, func);
+ if (!psdev) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dev_data = pci_get_drvdata(psdev->dev);
+ /* the driver data for a device should never be null at this point */
+ if (!dev_data) {
+ err = -ENXIO;
+ goto release;
+ }
+ if (!dev_data->permissive) {
+ dev_data->permissive = 1;
+ /* Let user know that what they're doing could be unsafe */
+ dev_warn(&psdev->dev->dev, "enabling permissive mode "
+ "configuration space accesses!\n");
+ dev_warn(&psdev->dev->dev,
+ "permissive mode is potentially unsafe!\n");
+ }
+release:
+ pcistub_device_put(psdev);
+out:
+ if (!err)
+ err = count;
+ return err;
+}
+
+static ssize_t permissive_show(struct device_driver *drv, char *buf)
+{
+ struct pcistub_device *psdev;
+ struct xen_pcibk_dev_data *dev_data;
+ size_t count = 0;
+ unsigned long flags;
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+ if (count >= PAGE_SIZE)
+ break;
+ if (!psdev->dev)
+ continue;
+ dev_data = pci_get_drvdata(psdev->dev);
+ if (!dev_data || !dev_data->permissive)
+ continue;
+ count +=
+ scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
+ pci_name(psdev->dev));
+ }
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ return count;
+}
+static DRIVER_ATTR_RW(permissive);
+
+static void pcistub_exit(void)
+{
+ driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot);
+ driver_remove_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_remove_slot);
+ driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_slots);
+ driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_quirks);
+ driver_remove_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_permissive);
+ driver_remove_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_irq_handlers);
+ driver_remove_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_irq_handler_state);
+ pci_unregister_driver(&xen_pcibk_pci_driver);
+}
+
+static int __init pcistub_init(void)
+{
+ int pos = 0;
+ int err = 0;
+ int domain, bus, slot, func;
+ int parsed;
+
+ if (pci_devs_to_hide && *pci_devs_to_hide) {
+ do {
+ parsed = 0;
+
+ err = sscanf(pci_devs_to_hide + pos,
+ " (%x:%x:%x.%x) %n",
+ &domain, &bus, &slot, &func, &parsed);
+ switch (err) {
+ case 3:
+ func = -1;
+ sscanf(pci_devs_to_hide + pos,
+ " (%x:%x:%x.*) %n",
+ &domain, &bus, &slot, &parsed);
+ break;
+ case 2:
+ slot = func = -1;
+ sscanf(pci_devs_to_hide + pos,
+ " (%x:%x:*.*) %n",
+ &domain, &bus, &parsed);
+ break;
+ }
+
+ if (!parsed) {
+ domain = 0;
+ err = sscanf(pci_devs_to_hide + pos,
+ " (%x:%x.%x) %n",
+ &bus, &slot, &func, &parsed);
+ switch (err) {
+ case 2:
+ func = -1;
+ sscanf(pci_devs_to_hide + pos,
+ " (%x:%x.*) %n",
+ &bus, &slot, &parsed);
+ break;
+ case 1:
+ slot = func = -1;
+ sscanf(pci_devs_to_hide + pos,
+ " (%x:*.*) %n",
+ &bus, &parsed);
+ break;
+ }
+ }
+
+ if (parsed <= 0)
+ goto parse_error;
+
+ err = pcistub_device_id_add(domain, bus, slot, func);
+ if (err)
+ goto out;
+
+ pos += parsed;
+ } while (pci_devs_to_hide[pos]);
+ }
+
+ /* If we're the first PCI Device Driver to register, we're the
+ * first one to get offered PCI devices as they become
+ * available (and thus we can be the first to grab them)
+ */
+ err = pci_register_driver(&xen_pcibk_pci_driver);
+ if (err < 0)
+ goto out;
+
+ err = driver_create_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_new_slot);
+ if (!err)
+ err = driver_create_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_remove_slot);
+ if (!err)
+ err = driver_create_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_slots);
+ if (!err)
+ err = driver_create_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_quirks);
+ if (!err)
+ err = driver_create_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_permissive);
+
+ if (!err)
+ err = driver_create_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_irq_handlers);
+ if (!err)
+ err = driver_create_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_irq_handler_state);
+ if (err)
+ pcistub_exit();
+
+out:
+ return err;
+
+parse_error:
+ pr_err("Error parsing pci_devs_to_hide at \"%s\"\n",
+ pci_devs_to_hide + pos);
+ return -EINVAL;
+}
+
+#ifndef MODULE
+/*
+ * fs_initcall happens before device_initcall
+ * so xen_pcibk *should* get called first (b/c we
+ * want to suck up any device before other drivers
+ * get a chance by being the first pci device
+ * driver to register)
+ */
+fs_initcall(pcistub_init);
+#endif
+
+#ifdef CONFIG_PCI_IOV
+static struct pcistub_device *find_vfs(const struct pci_dev *pdev)
+{
+ struct pcistub_device *psdev = NULL;
+ unsigned long flags;
+ bool found = false;
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+ if (!psdev->pdev && psdev->dev != pdev
+ && pci_physfn(psdev->dev) == pdev) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ if (found)
+ return psdev;
+ return NULL;
+}
+
+static int pci_stub_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ const struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (action != BUS_NOTIFY_UNBIND_DRIVER)
+ return NOTIFY_DONE;
+
+ if (!pdev->is_physfn)
+ return NOTIFY_DONE;
+
+ for (;;) {
+ struct pcistub_device *psdev = find_vfs(pdev);
+ if (!psdev)
+ break;
+ device_release_driver(&psdev->dev->dev);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block pci_stub_nb = {
+ .notifier_call = pci_stub_notifier,
+};
+#endif
+
+static int __init xen_pcibk_init(void)
+{
+ int err;
+
+ if (!xen_initial_domain())
+ return -ENODEV;
+
+ err = xen_pcibk_config_init();
+ if (err)
+ return err;
+
+#ifdef MODULE
+ err = pcistub_init();
+ if (err < 0)
+ return err;
+#endif
+
+ pcistub_init_devices_late();
+ err = xen_pcibk_xenbus_register();
+ if (err)
+ pcistub_exit();
+#ifdef CONFIG_PCI_IOV
+ else
+ bus_register_notifier(&pci_bus_type, &pci_stub_nb);
+#endif
+
+ return err;
+}
+
+static void __exit xen_pcibk_cleanup(void)
+{
+#ifdef CONFIG_PCI_IOV
+ bus_unregister_notifier(&pci_bus_type, &pci_stub_nb);
+#endif
+ xen_pcibk_xenbus_unregister();
+ pcistub_exit();
+}
+
+module_init(xen_pcibk_init);
+module_exit(xen_pcibk_cleanup);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("xen-backend:pci");
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
new file mode 100644
index 000000000..235cdfe13
--- /dev/null
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PCI Backend Common Data Structures & Function Declarations
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#ifndef __XEN_PCIBACK_H__
+#define __XEN_PCIBACK_H__
+
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <xen/xenbus.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/atomic.h>
+#include <xen/events.h>
+#include <xen/interface/io/pciif.h>
+
+#define DRV_NAME "xen-pciback"
+
+struct pci_dev_entry {
+ struct list_head list;
+ struct pci_dev *dev;
+};
+
+#define _PDEVF_op_active (0)
+#define PDEVF_op_active (1<<(_PDEVF_op_active))
+#define _PCIB_op_pending (1)
+#define PCIB_op_pending (1<<(_PCIB_op_pending))
+#define _EOI_pending (2)
+#define EOI_pending (1<<(_EOI_pending))
+
+struct xen_pcibk_device {
+ void *pci_dev_data;
+ struct mutex dev_lock;
+ struct xenbus_device *xdev;
+ struct xenbus_watch be_watch;
+ u8 be_watching;
+ int evtchn_irq;
+ struct xen_pci_sharedinfo *sh_info;
+ unsigned long flags;
+ struct work_struct op_work;
+ struct xen_pci_op op;
+};
+
+struct xen_pcibk_dev_data {
+ struct list_head config_fields;
+ struct pci_saved_state *pci_saved_state;
+ unsigned int permissive:1;
+ unsigned int warned_on_write:1;
+ unsigned int enable_intx:1;
+ unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
+ unsigned int ack_intr:1; /* .. and ACK-ing */
+ unsigned long handled;
+ unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
+ char irq_name[0]; /* xen-pcibk[000:04:00.0] */
+};
+
+/* Used by XenBus and xen_pcibk_ops.c */
+extern wait_queue_head_t xen_pcibk_aer_wait_queue;
+/* Used by pcistub.c and conf_space_quirks.c */
+extern struct list_head xen_pcibk_quirks;
+
+/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
+struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
+ int domain, int bus,
+ int slot, int func);
+struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev);
+void pcistub_put_pci_dev(struct pci_dev *dev);
+
+/* Ensure a device is turned off or reset */
+void xen_pcibk_reset_device(struct pci_dev *pdev);
+
+/* Access a virtual configuration space for a PCI device */
+int xen_pcibk_config_init(void);
+int xen_pcibk_config_init_dev(struct pci_dev *dev);
+void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev);
+void xen_pcibk_config_reset_dev(struct pci_dev *dev);
+void xen_pcibk_config_free_dev(struct pci_dev *dev);
+int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
+ u32 *ret_val);
+int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size,
+ u32 value);
+
+/* Handle requests for specific devices from the frontend */
+typedef int (*publish_pci_dev_cb) (struct xen_pcibk_device *pdev,
+ unsigned int domain, unsigned int bus,
+ unsigned int devfn, unsigned int devid);
+typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev,
+ unsigned int domain, unsigned int bus);
+
+/* Backend registration for the two types of BDF representation:
+ * vpci - BDFs start at 00
+ * passthrough - BDFs are exactly like in the host.
+ */
+struct xen_pcibk_backend {
+ const char *name;
+ int (*init)(struct xen_pcibk_device *pdev);
+ void (*free)(struct xen_pcibk_device *pdev);
+ int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev,
+ unsigned int *domain, unsigned int *bus,
+ unsigned int *devfn);
+ int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb);
+ void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
+ bool lock);
+ int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
+ int devid, publish_pci_dev_cb publish_cb);
+ struct pci_dev *(*get)(struct xen_pcibk_device *pdev,
+ unsigned int domain, unsigned int bus,
+ unsigned int devfn);
+};
+
+extern const struct xen_pcibk_backend xen_pcibk_vpci_backend;
+extern const struct xen_pcibk_backend xen_pcibk_passthrough_backend;
+extern const struct xen_pcibk_backend *xen_pcibk_backend;
+
+static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev,
+ int devid,
+ publish_pci_dev_cb publish_cb)
+{
+ if (xen_pcibk_backend && xen_pcibk_backend->add)
+ return xen_pcibk_backend->add(pdev, dev, devid, publish_cb);
+ return -1;
+}
+
+static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev, bool lock)
+{
+ if (xen_pcibk_backend && xen_pcibk_backend->release)
+ return xen_pcibk_backend->release(pdev, dev, lock);
+}
+
+static inline struct pci_dev *
+xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
+ unsigned int bus, unsigned int devfn)
+{
+ if (xen_pcibk_backend && xen_pcibk_backend->get)
+ return xen_pcibk_backend->get(pdev, domain, bus, devfn);
+ return NULL;
+}
+
+/**
+* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk
+* before sending aer request to pcifront, so that guest could identify
+* device, coopearte with xen_pcibk to finish aer recovery job if device driver
+* has the capability
+*/
+static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
+ struct xen_pcibk_device *pdev,
+ unsigned int *domain,
+ unsigned int *bus,
+ unsigned int *devfn)
+{
+ if (xen_pcibk_backend && xen_pcibk_backend->find)
+ return xen_pcibk_backend->find(pcidev, pdev, domain, bus,
+ devfn);
+ return -1;
+}
+
+static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
+{
+ if (xen_pcibk_backend && xen_pcibk_backend->init)
+ return xen_pcibk_backend->init(pdev);
+ return -1;
+}
+
+static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
+ publish_pci_root_cb cb)
+{
+ if (xen_pcibk_backend && xen_pcibk_backend->publish)
+ return xen_pcibk_backend->publish(pdev, cb);
+ return -1;
+}
+
+static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
+{
+ if (xen_pcibk_backend && xen_pcibk_backend->free)
+ return xen_pcibk_backend->free(pdev);
+}
+
+/* Handles events from front-end */
+irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
+void xen_pcibk_do_op(struct work_struct *data);
+
+static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev,
+ unsigned int eoi_flag)
+{
+ if (test_and_clear_bit(_EOI_pending, &pdev->flags))
+ xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag);
+}
+
+int xen_pcibk_xenbus_register(void);
+void xen_pcibk_xenbus_unregister(void);
+
+extern int verbose_request;
+#endif
+
+/* Handles shared IRQs that can to device domain and control domain. */
+void xen_pcibk_irq_handler(struct pci_dev *dev, int reset);
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
new file mode 100644
index 000000000..c4ed2c634
--- /dev/null
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Backend Operations - respond to PCI requests from Frontend
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/moduleparam.h>
+#include <linux/wait.h>
+#include <linux/bitops.h>
+#include <xen/events.h>
+#include <linux/sched.h>
+#include "pciback.h"
+
+int verbose_request;
+module_param(verbose_request, int, 0644);
+
+static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id);
+
+/* Ensure a device is has the fake IRQ handler "turned on/off" and is
+ * ready to be exported. This MUST be run after xen_pcibk_reset_device
+ * which does the actual PCI device enable/disable.
+ */
+static void xen_pcibk_control_isr(struct pci_dev *dev, int reset)
+{
+ struct xen_pcibk_dev_data *dev_data;
+ int rc;
+ int enable = 0;
+
+ dev_data = pci_get_drvdata(dev);
+ if (!dev_data)
+ return;
+
+ /* We don't deal with bridges */
+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
+ return;
+
+ if (reset) {
+ dev_data->enable_intx = 0;
+ dev_data->ack_intr = 0;
+ }
+ enable = dev_data->enable_intx;
+
+ /* Asked to disable, but ISR isn't runnig */
+ if (!enable && !dev_data->isr_on)
+ return;
+
+ /* Squirrel away the IRQs in the dev_data. We need this
+ * b/c when device transitions to MSI, the dev->irq is
+ * overwritten with the MSI vector.
+ */
+ if (enable)
+ dev_data->irq = dev->irq;
+
+ /*
+ * SR-IOV devices in all use MSI-X and have no legacy
+ * interrupts, so inhibit creating a fake IRQ handler for them.
+ */
+ if (dev_data->irq == 0)
+ goto out;
+
+ dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
+ dev_data->irq_name,
+ dev_data->irq,
+ pci_is_enabled(dev) ? "on" : "off",
+ dev->msi_enabled ? "MSI" : "",
+ dev->msix_enabled ? "MSI/X" : "",
+ dev_data->isr_on ? "enable" : "disable",
+ enable ? "enable" : "disable");
+
+ if (enable) {
+ /*
+ * The MSI or MSI-X should not have an IRQ handler. Otherwise
+ * if the guest terminates we BUG_ON in free_msi_irqs.
+ */
+ if (dev->msi_enabled || dev->msix_enabled)
+ goto out;
+
+ rc = request_irq(dev_data->irq,
+ xen_pcibk_guest_interrupt, IRQF_SHARED,
+ dev_data->irq_name, dev);
+ if (rc) {
+ dev_err(&dev->dev, "%s: failed to install fake IRQ " \
+ "handler for IRQ %d! (rc:%d)\n",
+ dev_data->irq_name, dev_data->irq, rc);
+ goto out;
+ }
+ } else {
+ free_irq(dev_data->irq, dev);
+ dev_data->irq = 0;
+ }
+ dev_data->isr_on = enable;
+ dev_data->ack_intr = enable;
+out:
+ dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
+ dev_data->irq_name,
+ dev_data->irq,
+ pci_is_enabled(dev) ? "on" : "off",
+ dev->msi_enabled ? "MSI" : "",
+ dev->msix_enabled ? "MSI/X" : "",
+ enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
+ (dev_data->isr_on ? "failed to disable" : "disabled"));
+}
+
+/* Ensure a device is "turned off" and ready to be exported.
+ * (Also see xen_pcibk_config_reset to ensure virtual configuration space is
+ * ready to be re-exported)
+ */
+void xen_pcibk_reset_device(struct pci_dev *dev)
+{
+ u16 cmd;
+
+ xen_pcibk_control_isr(dev, 1 /* reset device */);
+
+ /* Disable devices (but not bridges) */
+ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
+#ifdef CONFIG_PCI_MSI
+ /* The guest could have been abruptly killed without
+ * disabling MSI/MSI-X interrupts.*/
+ if (dev->msix_enabled)
+ pci_disable_msix(dev);
+ if (dev->msi_enabled)
+ pci_disable_msi(dev);
+#endif
+ if (pci_is_enabled(dev))
+ pci_disable_device(dev);
+
+ dev->is_busmaster = 0;
+ } else {
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ if (cmd & (PCI_COMMAND_INVALIDATE)) {
+ cmd &= ~(PCI_COMMAND_INVALIDATE);
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+
+ dev->is_busmaster = 0;
+ }
+ }
+}
+
+#ifdef CONFIG_PCI_MSI
+static
+int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+{
+ struct xen_pcibk_dev_data *dev_data;
+ int status;
+
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
+
+ if (dev->msi_enabled)
+ status = -EALREADY;
+ else if (dev->msix_enabled)
+ status = -ENXIO;
+ else
+ status = pci_enable_msi(dev);
+
+ if (status) {
+ pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n",
+ pci_name(dev), pdev->xdev->otherend_id,
+ status);
+ op->value = 0;
+ return XEN_PCI_ERR_op_failed;
+ }
+
+ /* The value the guest needs is actually the IDT vector, not the
+ * the local domain's IRQ number. */
+
+ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
+ op->value);
+
+ dev_data = pci_get_drvdata(dev);
+ if (dev_data)
+ dev_data->ack_intr = 0;
+
+ return 0;
+}
+
+static
+int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+{
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n",
+ pci_name(dev));
+
+ if (dev->msi_enabled) {
+ struct xen_pcibk_dev_data *dev_data;
+
+ pci_disable_msi(dev);
+
+ dev_data = pci_get_drvdata(dev);
+ if (dev_data)
+ dev_data->ack_intr = 1;
+ }
+ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
+ op->value);
+ return 0;
+}
+
+static
+int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+{
+ struct xen_pcibk_dev_data *dev_data;
+ int i, result;
+ struct msix_entry *entries;
+ u16 cmd;
+
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
+ pci_name(dev));
+
+ if (op->value > SH_INFO_MAX_VEC)
+ return -EINVAL;
+
+ if (dev->msix_enabled)
+ return -EALREADY;
+
+ /*
+ * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able
+ * to access the BARs where the MSI-X entries reside.
+ * But VF devices are unique in which the PF needs to be checked.
+ */
+ pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd);
+ if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY))
+ return -ENXIO;
+
+ entries = kmalloc_array(op->value, sizeof(*entries), GFP_KERNEL);
+ if (entries == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < op->value; i++) {
+ entries[i].entry = op->msix_entries[i].entry;
+ entries[i].vector = op->msix_entries[i].vector;
+ }
+
+ result = pci_enable_msix_exact(dev, entries, op->value);
+ if (result == 0) {
+ for (i = 0; i < op->value; i++) {
+ op->msix_entries[i].entry = entries[i].entry;
+ if (entries[i].vector) {
+ op->msix_entries[i].vector =
+ xen_pirq_from_irq(entries[i].vector);
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: " \
+ "MSI-X[%d]: %d\n",
+ pci_name(dev), i,
+ op->msix_entries[i].vector);
+ }
+ }
+ } else
+ pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n",
+ pci_name(dev), pdev->xdev->otherend_id,
+ result);
+ kfree(entries);
+
+ op->value = result;
+ dev_data = pci_get_drvdata(dev);
+ if (dev_data)
+ dev_data->ack_intr = 0;
+
+ return result > 0 ? 0 : result;
+}
+
+static
+int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+{
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n",
+ pci_name(dev));
+
+ if (dev->msix_enabled) {
+ struct xen_pcibk_dev_data *dev_data;
+
+ pci_disable_msix(dev);
+
+ dev_data = pci_get_drvdata(dev);
+ if (dev_data)
+ dev_data->ack_intr = 1;
+ }
+ /*
+ * SR-IOV devices (which don't have any legacy IRQ) have
+ * an undefined IRQ value of zero.
+ */
+ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n",
+ pci_name(dev), op->value);
+ return 0;
+}
+#endif
+
+static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev)
+{
+ return test_bit(_XEN_PCIF_active,
+ (unsigned long *)&pdev->sh_info->flags) &&
+ !test_and_set_bit(_PDEVF_op_active, &pdev->flags);
+}
+
+/*
+* Now the same evtchn is used for both pcifront conf_read_write request
+* as well as pcie aer front end ack. We use a new work_queue to schedule
+* xen_pcibk conf_read_write service for avoiding confict with aer_core
+* do_recovery job which also use the system default work_queue
+*/
+static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
+{
+ bool eoi = true;
+
+ /* Check that frontend is requesting an operation and that we are not
+ * already processing a request */
+ if (xen_pcibk_test_op_pending(pdev)) {
+ schedule_work(&pdev->op_work);
+ eoi = false;
+ }
+ /*_XEN_PCIB_active should have been cleared by pcifront. And also make
+ sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
+ if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
+ && test_bit(_PCIB_op_pending, &pdev->flags)) {
+ wake_up(&xen_pcibk_aer_wait_queue);
+ eoi = false;
+ }
+
+ /* EOI if there was nothing to do. */
+ if (eoi)
+ xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS);
+}
+
+/* Performing the configuration space reads/writes must not be done in atomic
+ * context because some of the pci_* functions can sleep (mostly due to ACPI
+ * use of semaphores). This function is intended to be called from a work
+ * queue in process context taking a struct xen_pcibk_device as a parameter */
+
+static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev)
+{
+ struct pci_dev *dev;
+ struct xen_pcibk_dev_data *dev_data = NULL;
+ struct xen_pci_op *op = &pdev->op;
+ int test_intx = 0;
+#ifdef CONFIG_PCI_MSI
+ unsigned int nr = 0;
+#endif
+
+ *op = pdev->sh_info->op;
+ barrier();
+ dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
+
+ if (dev == NULL)
+ op->err = XEN_PCI_ERR_dev_not_found;
+ else {
+ dev_data = pci_get_drvdata(dev);
+ if (dev_data)
+ test_intx = dev_data->enable_intx;
+ switch (op->cmd) {
+ case XEN_PCI_OP_conf_read:
+ op->err = xen_pcibk_config_read(dev,
+ op->offset, op->size, &op->value);
+ break;
+ case XEN_PCI_OP_conf_write:
+ op->err = xen_pcibk_config_write(dev,
+ op->offset, op->size, op->value);
+ break;
+#ifdef CONFIG_PCI_MSI
+ case XEN_PCI_OP_enable_msi:
+ op->err = xen_pcibk_enable_msi(pdev, dev, op);
+ break;
+ case XEN_PCI_OP_disable_msi:
+ op->err = xen_pcibk_disable_msi(pdev, dev, op);
+ break;
+ case XEN_PCI_OP_enable_msix:
+ nr = op->value;
+ op->err = xen_pcibk_enable_msix(pdev, dev, op);
+ break;
+ case XEN_PCI_OP_disable_msix:
+ op->err = xen_pcibk_disable_msix(pdev, dev, op);
+ break;
+#endif
+ default:
+ op->err = XEN_PCI_ERR_not_implemented;
+ break;
+ }
+ }
+ if (!op->err && dev && dev_data) {
+ /* Transition detected */
+ if ((dev_data->enable_intx != test_intx))
+ xen_pcibk_control_isr(dev, 0 /* no reset */);
+ }
+ pdev->sh_info->op.err = op->err;
+ pdev->sh_info->op.value = op->value;
+#ifdef CONFIG_PCI_MSI
+ if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) {
+ unsigned int i;
+
+ for (i = 0; i < nr; i++)
+ pdev->sh_info->op.msix_entries[i].vector =
+ op->msix_entries[i].vector;
+ }
+#endif
+ /* Tell the driver domain that we're done. */
+ wmb();
+ clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
+ notify_remote_via_irq(pdev->evtchn_irq);
+
+ /* Mark that we're done. */
+ smp_mb__before_atomic(); /* /after/ clearing PCIF_active */
+ clear_bit(_PDEVF_op_active, &pdev->flags);
+ smp_mb__after_atomic(); /* /before/ final check for work */
+}
+
+void xen_pcibk_do_op(struct work_struct *data)
+{
+ struct xen_pcibk_device *pdev =
+ container_of(data, struct xen_pcibk_device, op_work);
+
+ do {
+ xen_pcibk_do_one_op(pdev);
+ } while (xen_pcibk_test_op_pending(pdev));
+
+ xen_pcibk_lateeoi(pdev, 0);
+}
+
+irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
+{
+ struct xen_pcibk_device *pdev = dev_id;
+ bool eoi;
+
+ /* IRQs might come in before pdev->evtchn_irq is written. */
+ if (unlikely(pdev->evtchn_irq != irq))
+ pdev->evtchn_irq = irq;
+
+ eoi = test_and_set_bit(_EOI_pending, &pdev->flags);
+ WARN(eoi, "IRQ while EOI pending\n");
+
+ xen_pcibk_test_and_schedule_op(pdev);
+
+ return IRQ_HANDLED;
+}
+static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id)
+{
+ struct pci_dev *dev = (struct pci_dev *)dev_id;
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+
+ if (dev_data->isr_on && dev_data->ack_intr) {
+ dev_data->handled++;
+ if ((dev_data->handled % 1000) == 0) {
+ if (xen_test_irq_shared(irq)) {
+ pr_info("%s IRQ line is not shared "
+ "with other domains. Turning ISR off\n",
+ dev_data->irq_name);
+ dev_data->ack_intr = 0;
+ }
+ }
+ return IRQ_HANDLED;
+ }
+ return IRQ_NONE;
+}
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
new file mode 100644
index 000000000..30313084f
--- /dev/null
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
+ * to the frontend
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include "pciback.h"
+
+#define PCI_SLOT_MAX 32
+
+struct vpci_dev_data {
+ /* Access to dev_list must be protected by lock */
+ struct list_head dev_list[PCI_SLOT_MAX];
+ struct mutex lock;
+};
+
+static inline struct list_head *list_first(struct list_head *head)
+{
+ return head->next;
+}
+
+static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
+ unsigned int domain,
+ unsigned int bus,
+ unsigned int devfn)
+{
+ struct pci_dev_entry *entry;
+ struct pci_dev *dev = NULL;
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+
+ if (domain != 0 || bus != 0)
+ return NULL;
+
+ if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
+ mutex_lock(&vpci_dev->lock);
+
+ list_for_each_entry(entry,
+ &vpci_dev->dev_list[PCI_SLOT(devfn)],
+ list) {
+ if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
+ dev = entry->dev;
+ break;
+ }
+ }
+
+ mutex_unlock(&vpci_dev->lock);
+ }
+ return dev;
+}
+
+static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
+{
+ if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
+ && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
+ return 1;
+
+ return 0;
+}
+
+static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev, int devid,
+ publish_pci_dev_cb publish_cb)
+{
+ int err = 0, slot, func = PCI_FUNC(dev->devfn);
+ struct pci_dev_entry *t, *dev_entry;
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+
+ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
+ err = -EFAULT;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Can't export bridges on the virtual PCI bus");
+ goto out;
+ }
+
+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
+ if (!dev_entry) {
+ err = -ENOMEM;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error adding entry to virtual PCI bus");
+ goto out;
+ }
+
+ dev_entry->dev = dev;
+
+ mutex_lock(&vpci_dev->lock);
+
+ /*
+ * Keep multi-function devices together on the virtual PCI bus, except
+ * that we want to keep virtual functions at func 0 on their own. They
+ * aren't multi-function devices and hence their presence at func 0
+ * may cause guests to not scan the other functions.
+ */
+ if (!dev->is_virtfn || func) {
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+ if (list_empty(&vpci_dev->dev_list[slot]))
+ continue;
+
+ t = list_entry(list_first(&vpci_dev->dev_list[slot]),
+ struct pci_dev_entry, list);
+ if (t->dev->is_virtfn && !PCI_FUNC(t->dev->devfn))
+ continue;
+
+ if (match_slot(dev, t->dev)) {
+ pr_info("vpci: %s: assign to virtual slot %d func %d\n",
+ pci_name(dev), slot,
+ func);
+ list_add_tail(&dev_entry->list,
+ &vpci_dev->dev_list[slot]);
+ goto unlock;
+ }
+ }
+ }
+
+ /* Assign to a new slot on the virtual PCI bus */
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+ if (list_empty(&vpci_dev->dev_list[slot])) {
+ pr_info("vpci: %s: assign to virtual slot %d\n",
+ pci_name(dev), slot);
+ list_add_tail(&dev_entry->list,
+ &vpci_dev->dev_list[slot]);
+ goto unlock;
+ }
+ }
+
+ err = -ENOMEM;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "No more space on root virtual PCI bus");
+
+unlock:
+ mutex_unlock(&vpci_dev->lock);
+
+ /* Publish this device. */
+ if (!err)
+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
+ else
+ kfree(dev_entry);
+
+out:
+ return err;
+}
+
+static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev, bool lock)
+{
+ int slot;
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+ struct pci_dev *found_dev = NULL;
+
+ mutex_lock(&vpci_dev->lock);
+
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+ struct pci_dev_entry *e;
+
+ list_for_each_entry(e, &vpci_dev->dev_list[slot], list) {
+ if (e->dev == dev) {
+ list_del(&e->list);
+ found_dev = e->dev;
+ kfree(e);
+ goto out;
+ }
+ }
+ }
+
+out:
+ mutex_unlock(&vpci_dev->lock);
+
+ if (found_dev) {
+ if (lock)
+ device_lock(&found_dev->dev);
+ pcistub_put_pci_dev(found_dev);
+ if (lock)
+ device_unlock(&found_dev->dev);
+ }
+}
+
+static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
+{
+ int slot;
+ struct vpci_dev_data *vpci_dev;
+
+ vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
+ if (!vpci_dev)
+ return -ENOMEM;
+
+ mutex_init(&vpci_dev->lock);
+
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++)
+ INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
+
+ pdev->pci_dev_data = vpci_dev;
+
+ return 0;
+}
+
+static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
+ publish_pci_root_cb publish_cb)
+{
+ /* The Virtual PCI bus has only one root */
+ return publish_cb(pdev, 0, 0);
+}
+
+static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
+{
+ int slot;
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+ struct pci_dev_entry *e, *tmp;
+ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
+ list) {
+ struct pci_dev *dev = e->dev;
+ list_del(&e->list);
+ device_lock(&dev->dev);
+ pcistub_put_pci_dev(dev);
+ device_unlock(&dev->dev);
+ kfree(e);
+ }
+ }
+
+ kfree(vpci_dev);
+ pdev->pci_dev_data = NULL;
+}
+
+static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
+ struct xen_pcibk_device *pdev,
+ unsigned int *domain, unsigned int *bus,
+ unsigned int *devfn)
+{
+ struct pci_dev_entry *entry;
+ struct pci_dev *dev = NULL;
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+ int found = 0, slot;
+
+ mutex_lock(&vpci_dev->lock);
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+ list_for_each_entry(entry,
+ &vpci_dev->dev_list[slot],
+ list) {
+ dev = entry->dev;
+ if (dev && dev->bus->number == pcidev->bus->number
+ && pci_domain_nr(dev->bus) ==
+ pci_domain_nr(pcidev->bus)
+ && dev->devfn == pcidev->devfn) {
+ found = 1;
+ *domain = 0;
+ *bus = 0;
+ *devfn = PCI_DEVFN(slot,
+ PCI_FUNC(pcidev->devfn));
+ }
+ }
+ }
+ mutex_unlock(&vpci_dev->lock);
+ return found;
+}
+
+const struct xen_pcibk_backend xen_pcibk_vpci_backend = {
+ .name = "vpci",
+ .init = __xen_pcibk_init_devices,
+ .free = __xen_pcibk_release_devices,
+ .find = __xen_pcibk_get_pcifront_dev,
+ .publish = __xen_pcibk_publish_pci_roots,
+ .release = __xen_pcibk_release_pci_dev,
+ .add = __xen_pcibk_add_pci_dev,
+ .get = __xen_pcibk_get_pci_dev,
+};
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
new file mode 100644
index 000000000..4fb6aacf9
--- /dev/null
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -0,0 +1,755 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Backend Xenbus Setup - handles setup with frontend and xend
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+#include <linux/workqueue.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <asm/xen/pci.h>
+#include "pciback.h"
+
+#define INVALID_EVTCHN_IRQ (-1)
+
+static bool __read_mostly passthrough;
+module_param(passthrough, bool, S_IRUGO);
+MODULE_PARM_DESC(passthrough,
+ "Option to specify how to export PCI topology to guest:\n"\
+ " 0 - (default) Hide the true PCI topology and makes the frontend\n"\
+ " there is a single PCI bus with only the exported devices on it.\n"\
+ " For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\
+ " while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\
+ " 1 - Passthrough provides a real view of the PCI topology to the\n"\
+ " frontend (for example, a device at 06:01.b will still appear at\n"\
+ " 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\
+ " exposed PCI devices to its driver domains. This may be required\n"\
+ " for drivers which depend on finding their hardward in certain\n"\
+ " bus/slot locations.");
+
+static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
+{
+ struct xen_pcibk_device *pdev;
+
+ pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL);
+ if (pdev == NULL)
+ goto out;
+ dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
+
+ pdev->xdev = xdev;
+
+ mutex_init(&pdev->dev_lock);
+
+ pdev->sh_info = NULL;
+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
+ pdev->be_watching = 0;
+
+ INIT_WORK(&pdev->op_work, xen_pcibk_do_op);
+
+ if (xen_pcibk_init_devices(pdev)) {
+ kfree(pdev);
+ pdev = NULL;
+ }
+
+ dev_set_drvdata(&xdev->dev, pdev);
+
+out:
+ return pdev;
+}
+
+static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
+{
+ mutex_lock(&pdev->dev_lock);
+ /* Ensure the guest can't trigger our handler before removing devices */
+ if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
+ unbind_from_irqhandler(pdev->evtchn_irq, pdev);
+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
+ }
+
+ /* If the driver domain started an op, make sure we complete it
+ * before releasing the shared memory */
+
+ flush_work(&pdev->op_work);
+
+ if (pdev->sh_info != NULL) {
+ xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
+ pdev->sh_info = NULL;
+ }
+ mutex_unlock(&pdev->dev_lock);
+}
+
+static void free_pdev(struct xen_pcibk_device *pdev)
+{
+ if (pdev->be_watching) {
+ unregister_xenbus_watch(&pdev->be_watch);
+ pdev->be_watching = 0;
+ }
+
+ xen_pcibk_disconnect(pdev);
+
+ /* N.B. This calls pcistub_put_pci_dev which does the FLR on all
+ * of the PCIe devices. */
+ xen_pcibk_release_devices(pdev);
+
+ dev_set_drvdata(&pdev->xdev->dev, NULL);
+ pdev->xdev = NULL;
+
+ kfree(pdev);
+}
+
+static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
+ int remote_evtchn)
+{
+ int err = 0;
+ void *vaddr;
+
+ dev_dbg(&pdev->xdev->dev,
+ "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
+ gnt_ref, remote_evtchn);
+
+ err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
+ if (err < 0) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error mapping other domain page in ours.");
+ goto out;
+ }
+
+ pdev->sh_info = vaddr;
+
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
+ pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
+ 0, DRV_NAME, pdev);
+ if (err < 0) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error binding event channel to IRQ");
+ goto out;
+ }
+ pdev->evtchn_irq = err;
+ err = 0;
+
+ dev_dbg(&pdev->xdev->dev, "Attached!\n");
+out:
+ return err;
+}
+
+static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
+{
+ int err = 0;
+ int gnt_ref, remote_evtchn;
+ char *magic = NULL;
+
+
+ mutex_lock(&pdev->dev_lock);
+ /* Make sure we only do this setup once */
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+ XenbusStateInitialised)
+ goto out;
+
+ /* Wait for frontend to state that it has published the configuration */
+ if (xenbus_read_driver_state(pdev->xdev->otherend) !=
+ XenbusStateInitialised)
+ goto out;
+
+ dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
+
+ err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
+ "pci-op-ref", "%u", &gnt_ref,
+ "event-channel", "%u", &remote_evtchn,
+ "magic", NULL, &magic, NULL);
+ if (err) {
+ /* If configuration didn't get read correctly, wait longer */
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error reading configuration from frontend");
+ goto out;
+ }
+
+ if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
+ xenbus_dev_fatal(pdev->xdev, -EFAULT,
+ "version mismatch (%s/%s) with pcifront - "
+ "halting " DRV_NAME,
+ magic, XEN_PCI_MAGIC);
+ err = -EFAULT;
+ goto out;
+ }
+
+ err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn);
+ if (err)
+ goto out;
+
+ dev_dbg(&pdev->xdev->dev, "Connecting...\n");
+
+ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
+ if (err)
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error switching to connected state!");
+
+ dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
+out:
+ mutex_unlock(&pdev->dev_lock);
+
+ kfree(magic);
+
+ return err;
+}
+
+static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev,
+ unsigned int domain, unsigned int bus,
+ unsigned int devfn, unsigned int devid)
+{
+ int err;
+ int len;
+ char str[64];
+
+ len = snprintf(str, sizeof(str), "vdev-%d", devid);
+ if (unlikely(len >= (sizeof(str) - 1))) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Note: The PV protocol uses %02x, don't change it */
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
+ "%04x:%02x:%02x.%02x", domain, bus,
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+out:
+ return err;
+}
+
+static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
+ int domain, int bus, int slot, int func,
+ int devid)
+{
+ struct pci_dev *dev;
+ int err = 0;
+
+ dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
+ domain, bus, slot, func);
+
+ dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
+ if (!dev) {
+ err = -EINVAL;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Couldn't locate PCI device "
+ "(%04x:%02x:%02x.%d)! "
+ "perhaps already in-use?",
+ domain, bus, slot, func);
+ goto out;
+ }
+
+ err = xen_pcibk_add_pci_dev(pdev, dev, devid,
+ xen_pcibk_publish_pci_dev);
+ if (err)
+ goto out;
+
+ dev_info(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
+ if (xen_register_device_domain_owner(dev,
+ pdev->xdev->otherend_id) != 0) {
+ dev_err(&dev->dev, "Stealing ownership from dom%d.\n",
+ xen_find_device_domain_owner(dev));
+ xen_unregister_device_domain_owner(dev);
+ xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
+ }
+
+ /* TODO: It'd be nice to export a bridge and have all of its children
+ * get exported with it. This may be best done in xend (which will
+ * have to calculate resource usage anyway) but we probably want to
+ * put something in here to ensure that if a bridge gets given to a
+ * driver domain, that all devices under that bridge are not given
+ * to other driver domains (as he who controls the bridge can disable
+ * it and stop the other devices from working).
+ */
+out:
+ return err;
+}
+
+static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
+ int domain, int bus, int slot, int func)
+{
+ int err = 0;
+ struct pci_dev *dev;
+
+ dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
+ domain, bus, slot, func);
+
+ dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
+ if (!dev) {
+ err = -EINVAL;
+ dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
+ "(%04x:%02x:%02x.%d)! not owned by this domain\n",
+ domain, bus, slot, func);
+ goto out;
+ }
+
+ dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
+ xen_unregister_device_domain_owner(dev);
+
+ /* N.B. This ends up calling pcistub_put_pci_dev which ends up
+ * doing the FLR. */
+ xen_pcibk_release_pci_dev(pdev, dev, true /* use the lock. */);
+
+out:
+ return err;
+}
+
+static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev,
+ unsigned int domain, unsigned int bus)
+{
+ unsigned int d, b;
+ int i, root_num, len, err;
+ char str[64];
+
+ dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
+
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
+ "root_num", "%d", &root_num);
+ if (err == 0 || err == -ENOENT)
+ root_num = 0;
+ else if (err < 0)
+ goto out;
+
+ /* Verify that we haven't already published this pci root */
+ for (i = 0; i < root_num; i++) {
+ len = snprintf(str, sizeof(str), "root-%d", i);
+ if (unlikely(len >= (sizeof(str) - 1))) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
+ str, "%x:%x", &d, &b);
+ if (err < 0)
+ goto out;
+ if (err != 2) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (d == domain && b == bus) {
+ err = 0;
+ goto out;
+ }
+ }
+
+ len = snprintf(str, sizeof(str), "root-%d", root_num);
+ if (unlikely(len >= (sizeof(str) - 1))) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
+ root_num, domain, bus);
+
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
+ "%04x:%02x", domain, bus);
+ if (err)
+ goto out;
+
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
+ "root_num", "%d", (root_num + 1));
+
+out:
+ return err;
+}
+
+static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev,
+ enum xenbus_state state)
+{
+ int err = 0;
+ int num_devs;
+ int domain, bus, slot, func;
+ unsigned int substate;
+ int i, len;
+ char state_str[64];
+ char dev_str[64];
+
+
+ dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
+
+ mutex_lock(&pdev->dev_lock);
+ if (xenbus_read_driver_state(pdev->xdev->nodename) != state)
+ goto out;
+
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
+ &num_devs);
+ if (err != 1) {
+ if (err >= 0)
+ err = -EINVAL;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error reading number of devices");
+ goto out;
+ }
+
+ for (i = 0; i < num_devs; i++) {
+ len = snprintf(state_str, sizeof(state_str), "state-%d", i);
+ if (unlikely(len >= (sizeof(state_str) - 1))) {
+ err = -ENOMEM;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "String overflow while reading "
+ "configuration");
+ goto out;
+ }
+ substate = xenbus_read_unsigned(pdev->xdev->nodename, state_str,
+ XenbusStateUnknown);
+
+ switch (substate) {
+ case XenbusStateInitialising:
+ dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
+
+ len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
+ if (unlikely(len >= (sizeof(dev_str) - 1))) {
+ err = -ENOMEM;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "String overflow while "
+ "reading configuration");
+ goto out;
+ }
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
+ dev_str, "%x:%x:%x.%x",
+ &domain, &bus, &slot, &func);
+ if (err < 0) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error reading device "
+ "configuration");
+ goto out;
+ }
+ if (err != 4) {
+ err = -EINVAL;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error parsing pci device "
+ "configuration");
+ goto out;
+ }
+
+ err = xen_pcibk_export_device(pdev, domain, bus, slot,
+ func, i);
+ if (err)
+ goto out;
+
+ /* Publish pci roots. */
+ err = xen_pcibk_publish_pci_roots(pdev,
+ xen_pcibk_publish_pci_root);
+ if (err) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error while publish PCI root"
+ "buses for frontend");
+ goto out;
+ }
+
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
+ state_str, "%d",
+ XenbusStateInitialised);
+ if (err) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error switching substate of "
+ "dev-%d\n", i);
+ goto out;
+ }
+ break;
+
+ case XenbusStateClosing:
+ dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
+
+ len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
+ if (unlikely(len >= (sizeof(dev_str) - 1))) {
+ err = -ENOMEM;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "String overflow while "
+ "reading configuration");
+ goto out;
+ }
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
+ dev_str, "%x:%x:%x.%x",
+ &domain, &bus, &slot, &func);
+ if (err < 0) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error reading device "
+ "configuration");
+ goto out;
+ }
+ if (err != 4) {
+ err = -EINVAL;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error parsing pci device "
+ "configuration");
+ goto out;
+ }
+
+ err = xen_pcibk_remove_device(pdev, domain, bus, slot,
+ func);
+ if (err)
+ goto out;
+
+ /* TODO: If at some point we implement support for pci
+ * root hot-remove on pcifront side, we'll need to
+ * remove unnecessary xenstore nodes of pci roots here.
+ */
+
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (state != XenbusStateReconfiguring)
+ /* Make sure we only reconfigure once. */
+ goto out;
+
+ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
+ if (err) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error switching to reconfigured state!");
+ goto out;
+ }
+
+out:
+ mutex_unlock(&pdev->dev_lock);
+ return 0;
+}
+
+static void xen_pcibk_frontend_changed(struct xenbus_device *xdev,
+ enum xenbus_state fe_state)
+{
+ struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev);
+
+ dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
+
+ switch (fe_state) {
+ case XenbusStateInitialised:
+ xen_pcibk_attach(pdev);
+ break;
+
+ case XenbusStateReconfiguring:
+ xen_pcibk_reconfigure(pdev, XenbusStateReconfiguring);
+ break;
+
+ case XenbusStateConnected:
+ /* pcifront switched its state from reconfiguring to connected.
+ * Then switch to connected state.
+ */
+ xenbus_switch_state(xdev, XenbusStateConnected);
+ break;
+
+ case XenbusStateClosing:
+ xen_pcibk_disconnect(pdev);
+ xenbus_switch_state(xdev, XenbusStateClosing);
+ break;
+
+ case XenbusStateClosed:
+ xen_pcibk_disconnect(pdev);
+ xenbus_switch_state(xdev, XenbusStateClosed);
+ if (xenbus_dev_is_online(xdev))
+ break;
+ /* fall through if not online */
+ case XenbusStateUnknown:
+ dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
+ device_unregister(&xdev->dev);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
+{
+ /* Get configuration from xend (if available now) */
+ int domain, bus, slot, func;
+ int err = 0;
+ int i, num_devs;
+ char dev_str[64];
+ char state_str[64];
+
+ mutex_lock(&pdev->dev_lock);
+ /* It's possible we could get the call to setup twice, so make sure
+ * we're not already connected.
+ */
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+ XenbusStateInitWait)
+ goto out;
+
+ dev_dbg(&pdev->xdev->dev, "getting be setup\n");
+
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
+ &num_devs);
+ if (err != 1) {
+ if (err >= 0)
+ err = -EINVAL;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error reading number of devices");
+ goto out;
+ }
+
+ for (i = 0; i < num_devs; i++) {
+ int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
+ if (unlikely(l >= (sizeof(dev_str) - 1))) {
+ err = -ENOMEM;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "String overflow while reading "
+ "configuration");
+ goto out;
+ }
+
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
+ "%x:%x:%x.%x", &domain, &bus, &slot, &func);
+ if (err < 0) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error reading device configuration");
+ goto out;
+ }
+ if (err != 4) {
+ err = -EINVAL;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error parsing pci device "
+ "configuration");
+ goto out;
+ }
+
+ err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i);
+ if (err)
+ goto out;
+
+ /* Switch substate of this device. */
+ l = snprintf(state_str, sizeof(state_str), "state-%d", i);
+ if (unlikely(l >= (sizeof(state_str) - 1))) {
+ err = -ENOMEM;
+ xenbus_dev_fatal(pdev->xdev, err,
+ "String overflow while reading "
+ "configuration");
+ goto out;
+ }
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
+ "%d", XenbusStateInitialised);
+ if (err) {
+ xenbus_dev_fatal(pdev->xdev, err, "Error switching "
+ "substate of dev-%d\n", i);
+ goto out;
+ }
+ }
+
+ err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root);
+ if (err) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error while publish PCI root buses "
+ "for frontend");
+ goto out;
+ }
+
+ err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
+ if (err)
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error switching to initialised state!");
+
+out:
+ mutex_unlock(&pdev->dev_lock);
+ if (!err)
+ /* see if pcifront is already configured (if not, we'll wait) */
+ xen_pcibk_attach(pdev);
+ return err;
+}
+
+static void xen_pcibk_be_watch(struct xenbus_watch *watch,
+ const char *path, const char *token)
+{
+ struct xen_pcibk_device *pdev =
+ container_of(watch, struct xen_pcibk_device, be_watch);
+
+ switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
+ case XenbusStateInitWait:
+ xen_pcibk_setup_backend(pdev);
+ break;
+
+ case XenbusStateInitialised:
+ /*
+ * We typically move to Initialised when the first device was
+ * added. Hence subsequent devices getting added may need
+ * reconfiguring.
+ */
+ xen_pcibk_reconfigure(pdev, XenbusStateInitialised);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int xen_pcibk_xenbus_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int err = 0;
+ struct xen_pcibk_device *pdev = alloc_pdev(dev);
+
+ if (pdev == NULL) {
+ err = -ENOMEM;
+ xenbus_dev_fatal(dev, err,
+ "Error allocating xen_pcibk_device struct");
+ goto out;
+ }
+
+ /* wait for xend to configure us */
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
+ if (err)
+ goto out;
+
+ /* watch the backend node for backend configuration information */
+ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
+ NULL, xen_pcibk_be_watch);
+ if (err)
+ goto out;
+
+ pdev->be_watching = 1;
+
+ /* We need to force a call to our callback here in case
+ * xend already configured us!
+ */
+ xen_pcibk_be_watch(&pdev->be_watch, NULL, 0);
+
+out:
+ return err;
+}
+
+static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
+{
+ struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev);
+
+ if (pdev != NULL)
+ free_pdev(pdev);
+
+ return 0;
+}
+
+static const struct xenbus_device_id xen_pcibk_ids[] = {
+ {"pci"},
+ {""},
+};
+
+static struct xenbus_driver xen_pcibk_driver = {
+ .name = DRV_NAME,
+ .ids = xen_pcibk_ids,
+ .probe = xen_pcibk_xenbus_probe,
+ .remove = xen_pcibk_xenbus_remove,
+ .otherend_changed = xen_pcibk_frontend_changed,
+};
+
+const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
+
+int __init xen_pcibk_xenbus_register(void)
+{
+ xen_pcibk_backend = &xen_pcibk_vpci_backend;
+ if (passthrough)
+ xen_pcibk_backend = &xen_pcibk_passthrough_backend;
+ pr_info("backend is %s\n", xen_pcibk_backend->name);
+ return xenbus_register_backend(&xen_pcibk_driver);
+}
+
+void __exit xen_pcibk_xenbus_unregister(void)
+{
+ xenbus_unregister_driver(&xen_pcibk_driver);
+}