diff options
Diffstat (limited to 'drivers/xen/xenbus')
-rw-r--r-- | drivers/xen/xenbus/Makefile | 15 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus.h | 139 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_client.c | 949 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_comms.c | 484 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_dev_backend.c | 134 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_dev_frontend.c | 725 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 1072 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_backend.c | 313 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_frontend.c | 516 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_xs.c | 956 |
10 files changed, 5303 insertions, 0 deletions
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile new file mode 100644 index 000000000..0c7532110 --- /dev/null +++ b/drivers/xen/xenbus/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-y += xenbus.o +obj-y += xenbus_dev_frontend.o + +xenbus-objs = +xenbus-objs += xenbus_client.o +xenbus-objs += xenbus_comms.o +xenbus-objs += xenbus_xs.o +xenbus-objs += xenbus_probe.o + +xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o +xenbus-objs += $(xenbus-be-objs-y) + +obj-$(CONFIG_XEN_BACKEND) += xenbus_dev_backend.o +obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h new file mode 100644 index 000000000..2754bdfad --- /dev/null +++ b/drivers/xen/xenbus/xenbus.h @@ -0,0 +1,139 @@ +/* + * Private include for xenbus communications. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 XenSource Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XENBUS_XENBUS_H +#define _XENBUS_XENBUS_H + +#include <linux/mutex.h> +#include <linux/uio.h> +#include <xen/xenbus.h> + +#define XEN_BUS_ID_SIZE 20 + +struct xen_bus_type { + char *root; + unsigned int levels; + int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); + int (*probe)(struct xen_bus_type *bus, const char *type, + const char *dir); + bool (*otherend_will_handle)(struct xenbus_watch *watch, + const char *path, const char *token); + void (*otherend_changed)(struct xenbus_watch *watch, const char *path, + const char *token); + struct bus_type bus; +}; + +enum xenstore_init { + XS_UNKNOWN, + XS_PV, + XS_HVM, + XS_LOCAL, +}; + +struct xs_watch_event { + struct list_head list; + unsigned int len; + struct xenbus_watch *handle; + const char *path; + const char *token; + char body[]; +}; + +enum xb_req_state { + xb_req_state_queued, + xb_req_state_wait_reply, + xb_req_state_got_reply, + xb_req_state_aborted +}; + +struct xb_req_data { + struct list_head list; + wait_queue_head_t wq; + struct xsd_sockmsg msg; + uint32_t caller_req_id; + enum xsd_sockmsg_type type; + char *body; + const struct kvec *vec; + int num_vecs; + int err; + enum xb_req_state state; + bool user_req; + void (*cb)(struct xb_req_data *); + void *par; +}; + +extern enum xenstore_init xen_store_domain_type; +extern const struct attribute_group *xenbus_dev_groups[]; +extern struct mutex xs_response_mutex; +extern struct list_head xs_reply_list; +extern struct list_head xb_write_list; +extern wait_queue_head_t xb_waitq; +extern struct mutex xb_write_mutex; + +int xs_init(void); +int xb_init_comms(void); +void xb_deinit_comms(void); +int xs_watch_msg(struct xs_watch_event *event); +void xs_request_exit(struct xb_req_data *req); + +int xenbus_match(struct device *_dev, struct device_driver *_drv); +int xenbus_dev_probe(struct device *_dev); +void xenbus_dev_remove(struct device *_dev); +int xenbus_register_driver_common(struct xenbus_driver *drv, + struct xen_bus_type *bus, + struct module *owner, + const char *mod_name); +int xenbus_probe_node(struct xen_bus_type *bus, + const char *type, + const char *nodename); +int xenbus_probe_devices(struct xen_bus_type *bus); + +void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); + +int xenbus_dev_suspend(struct device *dev); +int xenbus_dev_resume(struct device *dev); +int xenbus_dev_cancel(struct device *dev); + +void xenbus_otherend_changed(struct xenbus_watch *watch, + const char *path, const char *token, + int ignore_on_shutdown); + +int xenbus_read_otherend_details(struct xenbus_device *xendev, + char *id_node, char *path_node); + +void xenbus_ring_ops_init(void); + +int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par); +void xenbus_dev_queue_reply(struct xb_req_data *req); + +extern unsigned int xb_dev_generation_id; + +#endif diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c new file mode 100644 index 000000000..d4b251925 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_client.c @@ -0,0 +1,949 @@ +/****************************************************************************** + * Client-facing interface for the Xenbus driver. In other words, the + * interface between the Xenbus and the device-specific code, be it the + * frontend or the backend of that driver. + * + * Copyright (C) 2005 XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/vmalloc.h> +#include <linux/export.h> +#include <asm/xen/hypervisor.h> +#include <xen/page.h> +#include <xen/interface/xen.h> +#include <xen/interface/event_channel.h> +#include <xen/balloon.h> +#include <xen/events.h> +#include <xen/grant_table.h> +#include <xen/xenbus.h> +#include <xen/xen.h> +#include <xen/features.h> + +#include "xenbus.h" + +#define XENBUS_PAGES(_grants) (DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE)) + +#define XENBUS_MAX_RING_PAGES (XENBUS_PAGES(XENBUS_MAX_RING_GRANTS)) + +struct xenbus_map_node { + struct list_head next; + union { + struct { + struct vm_struct *area; + } pv; + struct { + struct page *pages[XENBUS_MAX_RING_PAGES]; + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; + void *addr; + } hvm; + }; + grant_handle_t handles[XENBUS_MAX_RING_GRANTS]; + unsigned int nr_handles; +}; + +struct map_ring_valloc { + struct xenbus_map_node *node; + + /* Why do we need two arrays? See comment of __xenbus_map_ring */ + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; + phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; + + struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS]; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; + + unsigned int idx; +}; + +static DEFINE_SPINLOCK(xenbus_valloc_lock); +static LIST_HEAD(xenbus_valloc_pages); + +struct xenbus_ring_ops { + int (*map)(struct xenbus_device *dev, struct map_ring_valloc *info, + grant_ref_t *gnt_refs, unsigned int nr_grefs, + void **vaddr); + int (*unmap)(struct xenbus_device *dev, void *vaddr); +}; + +static const struct xenbus_ring_ops *ring_ops __read_mostly; + +const char *xenbus_strstate(enum xenbus_state state) +{ + static const char *const name[] = { + [ XenbusStateUnknown ] = "Unknown", + [ XenbusStateInitialising ] = "Initialising", + [ XenbusStateInitWait ] = "InitWait", + [ XenbusStateInitialised ] = "Initialised", + [ XenbusStateConnected ] = "Connected", + [ XenbusStateClosing ] = "Closing", + [ XenbusStateClosed ] = "Closed", + [XenbusStateReconfiguring] = "Reconfiguring", + [XenbusStateReconfigured] = "Reconfigured", + }; + return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; +} +EXPORT_SYMBOL_GPL(xenbus_strstate); + +/** + * xenbus_watch_path - register a watch + * @dev: xenbus device + * @path: path to watch + * @watch: watch to register + * @callback: callback to register + * + * Register a @watch on the given path, using the given xenbus_watch structure + * for storage, and the given @callback function as the callback. Return 0 on + * success, or -errno on error. On success, the given @path will be saved as + * @watch->node, and remains the caller's to free. On error, @watch->node will + * be NULL, the device will switch to %XenbusStateClosing, and the error will + * be saved in the store. + */ +int xenbus_watch_path(struct xenbus_device *dev, const char *path, + struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), + void (*callback)(struct xenbus_watch *, + const char *, const char *)) +{ + int err; + + watch->node = path; + watch->will_handle = will_handle; + watch->callback = callback; + + err = register_xenbus_watch(watch); + + if (err) { + watch->node = NULL; + watch->will_handle = NULL; + watch->callback = NULL; + xenbus_dev_fatal(dev, err, "adding watch on %s", path); + } + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_watch_path); + + +/** + * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path + * @dev: xenbus device + * @watch: watch to register + * @callback: callback to register + * @pathfmt: format of path to watch + * + * Register a watch on the given @path, using the given xenbus_watch + * structure for storage, and the given @callback function as the callback. + * Return 0 on success, or -errno on error. On success, the watched path + * (@path/@path2) will be saved as @watch->node, and becomes the caller's to + * kfree(). On error, watch->node will be NULL, so the caller has nothing to + * free, the device will switch to %XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_watch_pathfmt(struct xenbus_device *dev, + struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), + void (*callback)(struct xenbus_watch *, + const char *, const char *), + const char *pathfmt, ...) +{ + int err; + va_list ap; + char *path; + + va_start(ap, pathfmt); + path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap); + va_end(ap); + + if (!path) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); + return -ENOMEM; + } + err = xenbus_watch_path(dev, path, watch, will_handle, callback); + + if (err) + kfree(path); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); + +static void xenbus_switch_fatal(struct xenbus_device *, int, int, + const char *, ...); + +static int +__xenbus_switch_state(struct xenbus_device *dev, + enum xenbus_state state, int depth) +{ + /* We check whether the state is currently set to the given value, and + if not, then the state is set. We don't want to unconditionally + write the given state, because we don't want to fire watches + unnecessarily. Furthermore, if the node has gone, we don't write + to it, as the device will be tearing down, and we don't want to + resurrect that directory. + + Note that, because of this cached value of our state, this + function will not take a caller's Xenstore transaction + (something it was trying to in the past) because dev->state + would not get reset if the transaction was aborted. + */ + + struct xenbus_transaction xbt; + int current_state; + int err, abort; + + if (state == dev->state) + return 0; + +again: + abort = 1; + + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_switch_fatal(dev, depth, err, "starting transaction"); + return 0; + } + + err = xenbus_scanf(xbt, dev->nodename, "state", "%d", ¤t_state); + if (err != 1) + goto abort; + + err = xenbus_printf(xbt, dev->nodename, "state", "%d", state); + if (err) { + xenbus_switch_fatal(dev, depth, err, "writing new state"); + goto abort; + } + + abort = 0; +abort: + err = xenbus_transaction_end(xbt, abort); + if (err) { + if (err == -EAGAIN && !abort) + goto again; + xenbus_switch_fatal(dev, depth, err, "ending transaction"); + } else + dev->state = state; + + return 0; +} + +/** + * xenbus_switch_state + * @dev: xenbus device + * @state: new state + * + * Advertise in the store a change of the given driver to the given new_state. + * Return 0 on success, or -errno on error. On error, the device will switch + * to XenbusStateClosing, and the error will be saved in the store. + */ +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) +{ + return __xenbus_switch_state(dev, state, 0); +} + +EXPORT_SYMBOL_GPL(xenbus_switch_state); + +int xenbus_frontend_closed(struct xenbus_device *dev) +{ + xenbus_switch_state(dev, XenbusStateClosed); + complete(&dev->down); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_frontend_closed); + +static void xenbus_va_dev_error(struct xenbus_device *dev, int err, + const char *fmt, va_list ap) +{ + unsigned int len; + char *printf_buffer; + char *path_buffer; + +#define PRINTF_BUFFER_SIZE 4096 + + printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); + if (!printf_buffer) + return; + + len = sprintf(printf_buffer, "%i ", -err); + vsnprintf(printf_buffer + len, PRINTF_BUFFER_SIZE - len, fmt, ap); + + dev_err(&dev->dev, "%s\n", printf_buffer); + + path_buffer = kasprintf(GFP_KERNEL, "error/%s", dev->nodename); + if (path_buffer) + xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer); + + kfree(printf_buffer); + kfree(path_buffer); +} + +/** + * xenbus_dev_error + * @dev: xenbus device + * @err: error to report + * @fmt: error message format + * + * Report the given negative errno into the store, along with the given + * formatted message. + */ +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xenbus_va_dev_error(dev, err, fmt, ap); + va_end(ap); +} +EXPORT_SYMBOL_GPL(xenbus_dev_error); + +/** + * xenbus_dev_fatal + * @dev: xenbus device + * @err: error to report + * @fmt: error message format + * + * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by + * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly + * closedown of this driver and its peer. + */ + +void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xenbus_va_dev_error(dev, err, fmt, ap); + va_end(ap); + + xenbus_switch_state(dev, XenbusStateClosing); +} +EXPORT_SYMBOL_GPL(xenbus_dev_fatal); + +/** + * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps + * avoiding recursion within xenbus_switch_state. + */ +static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xenbus_va_dev_error(dev, err, fmt, ap); + va_end(ap); + + if (!depth) + __xenbus_switch_state(dev, XenbusStateClosing, 1); +} + +/* + * xenbus_setup_ring + * @dev: xenbus device + * @vaddr: pointer to starting virtual address of the ring + * @nr_pages: number of pages to be granted + * @grefs: grant reference array to be filled in + * + * Allocate physically contiguous pages for a shared ring buffer and grant it + * to the peer of the given device. The ring buffer is initially filled with + * zeroes. The virtual address of the ring is stored at @vaddr and the + * grant references are stored in the @grefs array. In case of error @vaddr + * will be set to NULL and @grefs will be filled with INVALID_GRANT_REF. + */ +int xenbus_setup_ring(struct xenbus_device *dev, gfp_t gfp, void **vaddr, + unsigned int nr_pages, grant_ref_t *grefs) +{ + unsigned long ring_size = nr_pages * XEN_PAGE_SIZE; + grant_ref_t gref_head; + unsigned int i; + void *addr; + int ret; + + addr = *vaddr = alloc_pages_exact(ring_size, gfp | __GFP_ZERO); + if (!*vaddr) { + ret = -ENOMEM; + goto err; + } + + ret = gnttab_alloc_grant_references(nr_pages, &gref_head); + if (ret) { + xenbus_dev_fatal(dev, ret, "granting access to %u ring pages", + nr_pages); + goto err; + } + + for (i = 0; i < nr_pages; i++) { + unsigned long gfn; + + if (is_vmalloc_addr(*vaddr)) + gfn = pfn_to_gfn(vmalloc_to_pfn(addr)); + else + gfn = virt_to_gfn(addr); + + grefs[i] = gnttab_claim_grant_reference(&gref_head); + gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id, + gfn, 0); + + addr += XEN_PAGE_SIZE; + } + + return 0; + + err: + if (*vaddr) + free_pages_exact(*vaddr, ring_size); + for (i = 0; i < nr_pages; i++) + grefs[i] = INVALID_GRANT_REF; + *vaddr = NULL; + + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_setup_ring); + +/* + * xenbus_teardown_ring + * @vaddr: starting virtual address of the ring + * @nr_pages: number of pages + * @grefs: grant reference array + * + * Remove grants for the shared ring buffer and free the associated memory. + * On return the grant reference array is filled with INVALID_GRANT_REF. + */ +void xenbus_teardown_ring(void **vaddr, unsigned int nr_pages, + grant_ref_t *grefs) +{ + unsigned int i; + + for (i = 0; i < nr_pages; i++) { + if (grefs[i] != INVALID_GRANT_REF) { + gnttab_end_foreign_access(grefs[i], NULL); + grefs[i] = INVALID_GRANT_REF; + } + } + + if (*vaddr) + free_pages_exact(*vaddr, nr_pages * XEN_PAGE_SIZE); + *vaddr = NULL; +} +EXPORT_SYMBOL_GPL(xenbus_teardown_ring); + +/** + * Allocate an event channel for the given xenbus_device, assigning the newly + * created local port to *port. Return 0 on success, or -errno on error. On + * error, the device will switch to XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port) +{ + struct evtchn_alloc_unbound alloc_unbound; + int err; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = dev->otherend_id; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err) + xenbus_dev_fatal(dev, err, "allocating event channel"); + else + *port = alloc_unbound.port; + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn); + + +/** + * Free an existing event channel. Returns 0 on success or -errno on error. + */ +int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port) +{ + struct evtchn_close close; + int err; + + close.port = port; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + if (err) + xenbus_dev_error(dev, err, "freeing event channel %u", port); + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_free_evtchn); + + +/** + * xenbus_map_ring_valloc + * @dev: xenbus device + * @gnt_refs: grant reference array + * @nr_grefs: number of grant references + * @vaddr: pointer to address to be filled out by mapping + * + * Map @nr_grefs pages of memory into this domain from another + * domain's grant table. xenbus_map_ring_valloc allocates @nr_grefs + * pages of virtual address space, maps the pages to that address, and + * sets *vaddr to that address. Returns 0 on success, and -errno on + * error. If an error is returned, device will switch to + * XenbusStateClosing and the error message will be saved in XenStore. + */ +int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, + unsigned int nr_grefs, void **vaddr) +{ + int err; + struct map_ring_valloc *info; + + *vaddr = NULL; + + if (nr_grefs > XENBUS_MAX_RING_GRANTS) + return -EINVAL; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->node = kzalloc(sizeof(*info->node), GFP_KERNEL); + if (!info->node) + err = -ENOMEM; + else + err = ring_ops->map(dev, info, gnt_refs, nr_grefs, vaddr); + + kfree(info->node); + kfree(info); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + +/* N.B. sizeof(phys_addr_t) doesn't always equal to sizeof(unsigned + * long), e.g. 32-on-64. Caller is responsible for preparing the + * right array to feed into this function */ +static int __xenbus_map_ring(struct xenbus_device *dev, + grant_ref_t *gnt_refs, + unsigned int nr_grefs, + grant_handle_t *handles, + struct map_ring_valloc *info, + unsigned int flags, + bool *leaked) +{ + int i, j; + + if (nr_grefs > XENBUS_MAX_RING_GRANTS) + return -EINVAL; + + for (i = 0; i < nr_grefs; i++) { + gnttab_set_map_op(&info->map[i], info->phys_addrs[i], flags, + gnt_refs[i], dev->otherend_id); + handles[i] = INVALID_GRANT_HANDLE; + } + + gnttab_batch_map(info->map, i); + + for (i = 0; i < nr_grefs; i++) { + if (info->map[i].status != GNTST_okay) { + xenbus_dev_fatal(dev, info->map[i].status, + "mapping in shared page %d from domain %d", + gnt_refs[i], dev->otherend_id); + goto fail; + } else + handles[i] = info->map[i].handle; + } + + return 0; + + fail: + for (i = j = 0; i < nr_grefs; i++) { + if (handles[i] != INVALID_GRANT_HANDLE) { + gnttab_set_unmap_op(&info->unmap[j], + info->phys_addrs[i], + GNTMAP_host_map, handles[i]); + j++; + } + } + + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j)); + + *leaked = false; + for (i = 0; i < j; i++) { + if (info->unmap[i].status != GNTST_okay) { + *leaked = true; + break; + } + } + + return -ENOENT; +} + +/** + * xenbus_unmap_ring + * @dev: xenbus device + * @handles: grant handle array + * @nr_handles: number of handles in the array + * @vaddrs: addresses to unmap + * + * Unmap memory in this domain that was imported from another domain. + * Returns 0 on success and returns GNTST_* on error + * (see xen/include/interface/grant_table.h). + */ +static int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles, + unsigned int nr_handles, unsigned long *vaddrs) +{ + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; + int i; + int err; + + if (nr_handles > XENBUS_MAX_RING_GRANTS) + return -EINVAL; + + for (i = 0; i < nr_handles; i++) + gnttab_set_unmap_op(&unmap[i], vaddrs[i], + GNTMAP_host_map, handles[i]); + + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)); + + err = GNTST_okay; + for (i = 0; i < nr_handles; i++) { + if (unmap[i].status != GNTST_okay) { + xenbus_dev_error(dev, unmap[i].status, + "unmapping page at handle %d error %d", + handles[i], unmap[i].status); + err = unmap[i].status; + break; + } + } + + return err; +} + +static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn, + unsigned int goffset, + unsigned int len, + void *data) +{ + struct map_ring_valloc *info = data; + unsigned long vaddr = (unsigned long)gfn_to_virt(gfn); + + info->phys_addrs[info->idx] = vaddr; + info->addrs[info->idx] = vaddr; + + info->idx++; +} + +static int xenbus_map_ring_hvm(struct xenbus_device *dev, + struct map_ring_valloc *info, + grant_ref_t *gnt_ref, + unsigned int nr_grefs, + void **vaddr) +{ + struct xenbus_map_node *node = info->node; + int err; + void *addr; + bool leaked = false; + unsigned int nr_pages = XENBUS_PAGES(nr_grefs); + + err = xen_alloc_unpopulated_pages(nr_pages, node->hvm.pages); + if (err) + goto out_err; + + gnttab_foreach_grant(node->hvm.pages, nr_grefs, + xenbus_map_ring_setup_grant_hvm, + info); + + err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles, + info, GNTMAP_host_map, &leaked); + node->nr_handles = nr_grefs; + + if (err) + goto out_free_ballooned_pages; + + addr = vmap(node->hvm.pages, nr_pages, VM_MAP | VM_IOREMAP, + PAGE_KERNEL); + if (!addr) { + err = -ENOMEM; + goto out_xenbus_unmap_ring; + } + + node->hvm.addr = addr; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); + + *vaddr = addr; + info->node = NULL; + + return 0; + + out_xenbus_unmap_ring: + if (!leaked) + xenbus_unmap_ring(dev, node->handles, nr_grefs, info->addrs); + else + pr_alert("leaking %p size %u page(s)", + addr, nr_pages); + out_free_ballooned_pages: + if (!leaked) + xen_free_unpopulated_pages(nr_pages, node->hvm.pages); + out_err: + return err; +} + +/** + * xenbus_unmap_ring_vfree + * @dev: xenbus device + * @vaddr: addr to unmap + * + * Based on Rusty Russell's skeleton driver's unmap_page. + * Unmap a page of memory in this domain that was imported from another domain. + * Use xenbus_unmap_ring_vfree if you mapped in your memory with + * xenbus_map_ring_valloc (it will free the virtual address space). + * Returns 0 on success and returns GNTST_* on error + * (see xen/include/interface/grant_table.h). + */ +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) +{ + return ring_ops->unmap(dev, vaddr); +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); + +#ifdef CONFIG_XEN_PV +static int map_ring_apply(pte_t *pte, unsigned long addr, void *data) +{ + struct map_ring_valloc *info = data; + + info->phys_addrs[info->idx++] = arbitrary_virt_to_machine(pte).maddr; + return 0; +} + +static int xenbus_map_ring_pv(struct xenbus_device *dev, + struct map_ring_valloc *info, + grant_ref_t *gnt_refs, + unsigned int nr_grefs, + void **vaddr) +{ + struct xenbus_map_node *node = info->node; + struct vm_struct *area; + bool leaked = false; + int err = -ENOMEM; + + area = get_vm_area(XEN_PAGE_SIZE * nr_grefs, VM_IOREMAP); + if (!area) + return -ENOMEM; + if (apply_to_page_range(&init_mm, (unsigned long)area->addr, + XEN_PAGE_SIZE * nr_grefs, map_ring_apply, info)) + goto failed; + err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles, + info, GNTMAP_host_map | GNTMAP_contains_pte, + &leaked); + if (err) + goto failed; + + node->nr_handles = nr_grefs; + node->pv.area = area; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); + + *vaddr = area->addr; + info->node = NULL; + + return 0; + +failed: + if (!leaked) + free_vm_area(area); + else + pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs); + + return err; +} + +static int xenbus_unmap_ring_pv(struct xenbus_device *dev, void *vaddr) +{ + struct xenbus_map_node *node; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; + unsigned int level; + int i; + bool leaked = false; + int err; + + spin_lock(&xenbus_valloc_lock); + list_for_each_entry(node, &xenbus_valloc_pages, next) { + if (node->pv.area->addr == vaddr) { + list_del(&node->next); + goto found; + } + } + node = NULL; + found: + spin_unlock(&xenbus_valloc_lock); + + if (!node) { + xenbus_dev_error(dev, -ENOENT, + "can't find mapped virtual address %p", vaddr); + return GNTST_bad_virt_addr; + } + + for (i = 0; i < node->nr_handles; i++) { + unsigned long addr; + + memset(&unmap[i], 0, sizeof(unmap[i])); + addr = (unsigned long)vaddr + (XEN_PAGE_SIZE * i); + unmap[i].host_addr = arbitrary_virt_to_machine( + lookup_address(addr, &level)).maddr; + unmap[i].dev_bus_addr = 0; + unmap[i].handle = node->handles[i]; + } + + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)); + + err = GNTST_okay; + leaked = false; + for (i = 0; i < node->nr_handles; i++) { + if (unmap[i].status != GNTST_okay) { + leaked = true; + xenbus_dev_error(dev, unmap[i].status, + "unmapping page at handle %d error %d", + node->handles[i], unmap[i].status); + err = unmap[i].status; + break; + } + } + + if (!leaked) + free_vm_area(node->pv.area); + else + pr_alert("leaking VM area %p size %u page(s)", + node->pv.area, node->nr_handles); + + kfree(node); + return err; +} + +static const struct xenbus_ring_ops ring_ops_pv = { + .map = xenbus_map_ring_pv, + .unmap = xenbus_unmap_ring_pv, +}; +#endif + +struct unmap_ring_hvm +{ + unsigned int idx; + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; +}; + +static void xenbus_unmap_ring_setup_grant_hvm(unsigned long gfn, + unsigned int goffset, + unsigned int len, + void *data) +{ + struct unmap_ring_hvm *info = data; + + info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn); + + info->idx++; +} + +static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr) +{ + int rv; + struct xenbus_map_node *node; + void *addr; + struct unmap_ring_hvm info = { + .idx = 0, + }; + unsigned int nr_pages; + + spin_lock(&xenbus_valloc_lock); + list_for_each_entry(node, &xenbus_valloc_pages, next) { + addr = node->hvm.addr; + if (addr == vaddr) { + list_del(&node->next); + goto found; + } + } + node = addr = NULL; + found: + spin_unlock(&xenbus_valloc_lock); + + if (!node) { + xenbus_dev_error(dev, -ENOENT, + "can't find mapped virtual address %p", vaddr); + return GNTST_bad_virt_addr; + } + + nr_pages = XENBUS_PAGES(node->nr_handles); + + gnttab_foreach_grant(node->hvm.pages, node->nr_handles, + xenbus_unmap_ring_setup_grant_hvm, + &info); + + rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles, + info.addrs); + if (!rv) { + vunmap(vaddr); + xen_free_unpopulated_pages(nr_pages, node->hvm.pages); + } + else + WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages); + + kfree(node); + return rv; +} + +/** + * xenbus_read_driver_state + * @path: path for driver + * + * Return the state of the driver rooted at the given store path, or + * XenbusStateUnknown if no state can be read. + */ +enum xenbus_state xenbus_read_driver_state(const char *path) +{ + enum xenbus_state result; + int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL); + if (err) + result = XenbusStateUnknown; + + return result; +} +EXPORT_SYMBOL_GPL(xenbus_read_driver_state); + +static const struct xenbus_ring_ops ring_ops_hvm = { + .map = xenbus_map_ring_hvm, + .unmap = xenbus_unmap_ring_hvm, +}; + +void __init xenbus_ring_ops_init(void) +{ +#ifdef CONFIG_XEN_PV + if (!xen_feature(XENFEAT_auto_translated_physmap)) + ring_ops = &ring_ops_pv; + else +#endif + ring_ops = &ring_ops_hvm; +} diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c new file mode 100644 index 000000000..e5fda0256 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -0,0 +1,484 @@ +/****************************************************************************** + * xenbus_comms.c + * + * Low level code to talks to Xen Store: ringbuffer and event channel. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/err.h> +#include <xen/xenbus.h> +#include <asm/xen/hypervisor.h> +#include <xen/events.h> +#include <xen/page.h> +#include "xenbus.h" + +/* A list of replies. Currently only one will ever be outstanding. */ +LIST_HEAD(xs_reply_list); + +/* A list of write requests. */ +LIST_HEAD(xb_write_list); +DECLARE_WAIT_QUEUE_HEAD(xb_waitq); +DEFINE_MUTEX(xb_write_mutex); + +/* Protect xenbus reader thread against save/restore. */ +DEFINE_MUTEX(xs_response_mutex); + +static int xenbus_irq; +static struct task_struct *xenbus_task; + +static irqreturn_t wake_waiting(int irq, void *unused) +{ + wake_up(&xb_waitq); + return IRQ_HANDLED; +} + +static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) +{ + return ((prod - cons) <= XENSTORE_RING_SIZE); +} + +static void *get_output_chunk(XENSTORE_RING_IDX cons, + XENSTORE_RING_IDX prod, + char *buf, uint32_t *len) +{ + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); + if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) + *len = XENSTORE_RING_SIZE - (prod - cons); + return buf + MASK_XENSTORE_IDX(prod); +} + +static const void *get_input_chunk(XENSTORE_RING_IDX cons, + XENSTORE_RING_IDX prod, + const char *buf, uint32_t *len) +{ + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); + if ((prod - cons) < *len) + *len = prod - cons; + return buf + MASK_XENSTORE_IDX(cons); +} + +static int xb_data_to_write(void) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + + return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE && + !list_empty(&xb_write_list); +} + +/** + * xb_write - low level write + * @data: buffer to send + * @len: length of buffer + * + * Returns number of bytes written or -err. + */ +static int xb_write(const void *data, unsigned int len) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + XENSTORE_RING_IDX cons, prod; + unsigned int bytes = 0; + + while (len != 0) { + void *dst; + unsigned int avail; + + /* Read indexes, then verify. */ + cons = intf->req_cons; + prod = intf->req_prod; + if (!check_indexes(cons, prod)) { + intf->req_cons = intf->req_prod = 0; + return -EIO; + } + if (!xb_data_to_write()) + return bytes; + + /* Must write data /after/ reading the consumer index. */ + virt_mb(); + + dst = get_output_chunk(cons, prod, intf->req, &avail); + if (avail == 0) + continue; + if (avail > len) + avail = len; + + memcpy(dst, data, avail); + data += avail; + len -= avail; + bytes += avail; + + /* Other side must not see new producer until data is there. */ + virt_wmb(); + intf->req_prod += avail; + + /* Implies mb(): other side will see the updated producer. */ + if (prod <= intf->req_cons) + notify_remote_via_evtchn(xen_store_evtchn); + } + + return bytes; +} + +static int xb_data_to_read(void) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + return (intf->rsp_cons != intf->rsp_prod); +} + +static int xb_read(void *data, unsigned int len) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + XENSTORE_RING_IDX cons, prod; + unsigned int bytes = 0; + + while (len != 0) { + unsigned int avail; + const char *src; + + /* Read indexes, then verify. */ + cons = intf->rsp_cons; + prod = intf->rsp_prod; + if (cons == prod) + return bytes; + + if (!check_indexes(cons, prod)) { + intf->rsp_cons = intf->rsp_prod = 0; + return -EIO; + } + + src = get_input_chunk(cons, prod, intf->rsp, &avail); + if (avail == 0) + continue; + if (avail > len) + avail = len; + + /* Must read data /after/ reading the producer index. */ + virt_rmb(); + + memcpy(data, src, avail); + data += avail; + len -= avail; + bytes += avail; + + /* Other side must not see free space until we've copied out */ + virt_mb(); + intf->rsp_cons += avail; + + /* Implies mb(): other side will see the updated consumer. */ + if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE) + notify_remote_via_evtchn(xen_store_evtchn); + } + + return bytes; +} + +static int process_msg(void) +{ + static struct { + struct xsd_sockmsg msg; + char *body; + union { + void *alloc; + struct xs_watch_event *watch; + }; + bool in_msg; + bool in_hdr; + unsigned int read; + } state; + struct xb_req_data *req; + int err; + unsigned int len; + + if (!state.in_msg) { + state.in_msg = true; + state.in_hdr = true; + state.read = 0; + + /* + * We must disallow save/restore while reading a message. + * A partial read across s/r leaves us out of sync with + * xenstored. + * xs_response_mutex is locked as long as we are processing one + * message. state.in_msg will be true as long as we are holding + * the lock here. + */ + mutex_lock(&xs_response_mutex); + + if (!xb_data_to_read()) { + /* We raced with save/restore: pending data 'gone'. */ + mutex_unlock(&xs_response_mutex); + state.in_msg = false; + return 0; + } + } + + if (state.in_hdr) { + if (state.read != sizeof(state.msg)) { + err = xb_read((void *)&state.msg + state.read, + sizeof(state.msg) - state.read); + if (err < 0) + goto out; + state.read += err; + if (state.read != sizeof(state.msg)) + return 0; + if (state.msg.len > XENSTORE_PAYLOAD_MAX) { + err = -EINVAL; + goto out; + } + } + + len = state.msg.len + 1; + if (state.msg.type == XS_WATCH_EVENT) + len += sizeof(*state.watch); + + state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH); + if (!state.alloc) + return -ENOMEM; + + if (state.msg.type == XS_WATCH_EVENT) + state.body = state.watch->body; + else + state.body = state.alloc; + state.in_hdr = false; + state.read = 0; + } + + err = xb_read(state.body + state.read, state.msg.len - state.read); + if (err < 0) + goto out; + + state.read += err; + if (state.read != state.msg.len) + return 0; + + state.body[state.msg.len] = '\0'; + + if (state.msg.type == XS_WATCH_EVENT) { + state.watch->len = state.msg.len; + err = xs_watch_msg(state.watch); + } else { + err = -ENOENT; + mutex_lock(&xb_write_mutex); + list_for_each_entry(req, &xs_reply_list, list) { + if (req->msg.req_id == state.msg.req_id) { + list_del(&req->list); + err = 0; + break; + } + } + mutex_unlock(&xb_write_mutex); + if (err) + goto out; + + if (req->state == xb_req_state_wait_reply) { + req->msg.req_id = req->caller_req_id; + req->msg.type = state.msg.type; + req->msg.len = state.msg.len; + req->body = state.body; + /* write body, then update state */ + virt_wmb(); + req->state = xb_req_state_got_reply; + req->cb(req); + } else + kfree(req); + } + + mutex_unlock(&xs_response_mutex); + + state.in_msg = false; + state.alloc = NULL; + return err; + + out: + mutex_unlock(&xs_response_mutex); + state.in_msg = false; + kfree(state.alloc); + state.alloc = NULL; + return err; +} + +static int process_writes(void) +{ + static struct { + struct xb_req_data *req; + int idx; + unsigned int written; + } state; + void *base; + unsigned int len; + int err = 0; + + if (!xb_data_to_write()) + return 0; + + mutex_lock(&xb_write_mutex); + + if (!state.req) { + state.req = list_first_entry(&xb_write_list, + struct xb_req_data, list); + state.idx = -1; + state.written = 0; + } + + if (state.req->state == xb_req_state_aborted) + goto out_err; + + while (state.idx < state.req->num_vecs) { + if (state.idx < 0) { + base = &state.req->msg; + len = sizeof(state.req->msg); + } else { + base = state.req->vec[state.idx].iov_base; + len = state.req->vec[state.idx].iov_len; + } + err = xb_write(base + state.written, len - state.written); + if (err < 0) + goto out_err; + state.written += err; + if (state.written != len) + goto out; + + state.idx++; + state.written = 0; + } + + list_del(&state.req->list); + state.req->state = xb_req_state_wait_reply; + list_add_tail(&state.req->list, &xs_reply_list); + state.req = NULL; + + out: + mutex_unlock(&xb_write_mutex); + + return 0; + + out_err: + state.req->msg.type = XS_ERROR; + state.req->err = err; + list_del(&state.req->list); + if (state.req->state == xb_req_state_aborted) + kfree(state.req); + else { + /* write err, then update state */ + virt_wmb(); + state.req->state = xb_req_state_got_reply; + wake_up(&state.req->wq); + } + + mutex_unlock(&xb_write_mutex); + + state.req = NULL; + + return err; +} + +static int xb_thread_work(void) +{ + return xb_data_to_read() || xb_data_to_write(); +} + +static int xenbus_thread(void *unused) +{ + int err; + + while (!kthread_should_stop()) { + if (wait_event_interruptible(xb_waitq, xb_thread_work())) + continue; + + err = process_msg(); + if (err == -ENOMEM) + schedule(); + else if (err) + pr_warn_ratelimited("error %d while reading message\n", + err); + + err = process_writes(); + if (err) + pr_warn_ratelimited("error %d while writing message\n", + err); + } + + xenbus_task = NULL; + return 0; +} + +/** + * xb_init_comms - Set up interrupt handler off store event channel. + */ +int xb_init_comms(void) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + + if (intf->req_prod != intf->req_cons) + pr_err("request ring is not quiescent (%08x:%08x)!\n", + intf->req_cons, intf->req_prod); + + if (intf->rsp_prod != intf->rsp_cons) { + pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n", + intf->rsp_cons, intf->rsp_prod); + /* breaks kdump */ + if (!reset_devices) + intf->rsp_cons = intf->rsp_prod; + } + + if (xenbus_irq) { + /* Already have an irq; assume we're resuming */ + rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); + } else { + int err; + + err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, + 0, "xenbus", &xb_waitq); + if (err < 0) { + pr_err("request irq failed %i\n", err); + return err; + } + + xenbus_irq = err; + + if (!xenbus_task) { + xenbus_task = kthread_run(xenbus_thread, NULL, + "xenbus"); + if (IS_ERR(xenbus_task)) + return PTR_ERR(xenbus_task); + } + } + + return 0; +} + +void xb_deinit_comms(void) +{ + unbind_from_irqhandler(xenbus_irq, &xb_waitq); + xenbus_irq = 0; +} diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c new file mode 100644 index 000000000..edba5fecd --- /dev/null +++ b/drivers/xen/xenbus/xenbus_dev_backend.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/init.h> +#include <linux/capability.h> + +#include <xen/xen.h> +#include <xen/page.h> +#include <xen/xenbus.h> +#include <xen/xenbus_dev.h> +#include <xen/grant_table.h> +#include <xen/events.h> +#include <asm/xen/hypervisor.h> + +#include "xenbus.h" + +static int xenbus_backend_open(struct inode *inode, struct file *filp) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return nonseekable_open(inode, filp); +} + +static long xenbus_alloc(domid_t domid) +{ + struct evtchn_alloc_unbound arg; + int err = -EEXIST; + + xs_suspend(); + + /* If xenstored_ready is nonzero, that means we have already talked to + * xenstore and set up watches. These watches will be restored by + * xs_resume, but that requires communication over the port established + * below that is not visible to anyone until the ioctl returns. + * + * This can be resolved by splitting the ioctl into two parts + * (postponing the resume until xenstored is active) but this is + * unnecessarily complex for the intended use where xenstored is only + * started once - so return -EEXIST if it's already running. + */ + if (xenstored_ready) + goto out_err; + + gnttab_grant_foreign_access_ref(GNTTAB_RESERVED_XENSTORE, domid, + virt_to_gfn(xen_store_interface), 0 /* writable */); + + arg.dom = DOMID_SELF; + arg.remote_dom = domid; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &arg); + if (err) + goto out_err; + + if (xen_store_evtchn > 0) + xb_deinit_comms(); + + xen_store_evtchn = arg.port; + + xs_resume(); + + return arg.port; + + out_err: + xs_suspend_cancel(); + return err; +} + +static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, + unsigned long data) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (cmd) { + case IOCTL_XENBUS_BACKEND_EVTCHN: + if (xen_store_evtchn > 0) + return xen_store_evtchn; + return -ENODEV; + case IOCTL_XENBUS_BACKEND_SETUP: + return xenbus_alloc(data); + default: + return -ENOTTY; + } +} + +static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) + return -EINVAL; + + if (remap_pfn_range(vma, vma->vm_start, + virt_to_pfn(xen_store_interface), + size, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static const struct file_operations xenbus_backend_fops = { + .open = xenbus_backend_open, + .mmap = xenbus_backend_mmap, + .unlocked_ioctl = xenbus_backend_ioctl, +}; + +static struct miscdevice xenbus_backend_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/xenbus_backend", + .fops = &xenbus_backend_fops, +}; + +static int __init xenbus_backend_init(void) +{ + int err; + + if (!xen_initial_domain()) + return -ENODEV; + + err = misc_register(&xenbus_backend_dev); + if (err) + pr_err("Could not register xenbus backend device\n"); + return err; +} +device_initcall(xenbus_backend_init); diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c new file mode 100644 index 000000000..0792fda49 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -0,0 +1,725 @@ +/* + * Driver giving user-space access to the kernel's xenbus connection + * to xenstore. + * + * Copyright (c) 2005, Christian Limpach + * Copyright (c) 2005, Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Changes: + * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem + * and /proc/xen compatibility mount point. + * Turned xenfs into a loadable module. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/uio.h> +#include <linux/notifier.h> +#include <linux/wait.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/mount.h> +#include <linux/pagemap.h> +#include <linux/uaccess.h> +#include <linux/init.h> +#include <linux/namei.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/miscdevice.h> +#include <linux/workqueue.h> + +#include <xen/xenbus.h> +#include <xen/xen.h> +#include <asm/xen/hypervisor.h> + +#include "xenbus.h" + +unsigned int xb_dev_generation_id; + +/* + * An element of a list of outstanding transactions, for which we're + * still waiting a reply. + */ +struct xenbus_transaction_holder { + struct list_head list; + struct xenbus_transaction handle; + unsigned int generation_id; +}; + +/* + * A buffer of data on the queue. + */ +struct read_buffer { + struct list_head list; + unsigned int cons; + unsigned int len; + char msg[]; +}; + +struct xenbus_file_priv { + /* + * msgbuffer_mutex is held while partial requests are built up + * and complete requests are acted on. It therefore protects + * the "transactions" and "watches" lists, and the partial + * request length and buffer. + * + * reply_mutex protects the reply being built up to return to + * usermode. It nests inside msgbuffer_mutex but may be held + * alone during a watch callback. + */ + struct mutex msgbuffer_mutex; + + /* In-progress transactions */ + struct list_head transactions; + + /* Active watches. */ + struct list_head watches; + + /* Partial request. */ + unsigned int len; + union { + struct xsd_sockmsg msg; + char buffer[XENSTORE_PAYLOAD_MAX]; + } u; + + /* Response queue. */ + struct mutex reply_mutex; + struct list_head read_buffers; + wait_queue_head_t read_waitq; + + struct kref kref; + + struct work_struct wq; +}; + +/* Read out any raw xenbus messages queued up. */ +static ssize_t xenbus_file_read(struct file *filp, + char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct xenbus_file_priv *u = filp->private_data; + struct read_buffer *rb; + ssize_t i; + int ret; + + mutex_lock(&u->reply_mutex); +again: + while (list_empty(&u->read_buffers)) { + mutex_unlock(&u->reply_mutex); + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + ret = wait_event_interruptible(u->read_waitq, + !list_empty(&u->read_buffers)); + if (ret) + return ret; + mutex_lock(&u->reply_mutex); + } + + rb = list_entry(u->read_buffers.next, struct read_buffer, list); + i = 0; + while (i < len) { + size_t sz = min_t(size_t, len - i, rb->len - rb->cons); + + ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz); + + i += sz - ret; + rb->cons += sz - ret; + + if (ret != 0) { + if (i == 0) + i = -EFAULT; + goto out; + } + + /* Clear out buffer if it has been consumed */ + if (rb->cons == rb->len) { + list_del(&rb->list); + kfree(rb); + if (list_empty(&u->read_buffers)) + break; + rb = list_entry(u->read_buffers.next, + struct read_buffer, list); + } + } + if (i == 0) + goto again; + +out: + mutex_unlock(&u->reply_mutex); + return i; +} + +/* + * Add a buffer to the queue. Caller must hold the appropriate lock + * if the queue is not local. (Commonly the caller will build up + * multiple queued buffers on a temporary local list, and then add it + * to the appropriate list under lock once all the buffers have een + * successfully allocated.) + */ +static int queue_reply(struct list_head *queue, const void *data, size_t len) +{ + struct read_buffer *rb; + + if (len == 0) + return 0; + if (len > XENSTORE_PAYLOAD_MAX) + return -EINVAL; + + rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); + if (rb == NULL) + return -ENOMEM; + + rb->cons = 0; + rb->len = len; + + memcpy(rb->msg, data, len); + + list_add_tail(&rb->list, queue); + return 0; +} + +/* + * Free all the read_buffer s on a list. + * Caller must have sole reference to list. + */ +static void queue_cleanup(struct list_head *list) +{ + struct read_buffer *rb; + + while (!list_empty(list)) { + rb = list_entry(list->next, struct read_buffer, list); + list_del(list->next); + kfree(rb); + } +} + +struct watch_adapter { + struct list_head list; + struct xenbus_watch watch; + struct xenbus_file_priv *dev_data; + char *token; +}; + +static void free_watch_adapter(struct watch_adapter *watch) +{ + kfree(watch->watch.node); + kfree(watch->token); + kfree(watch); +} + +static struct watch_adapter *alloc_watch_adapter(const char *path, + const char *token) +{ + struct watch_adapter *watch; + + watch = kzalloc(sizeof(*watch), GFP_KERNEL); + if (watch == NULL) + goto out_fail; + + watch->watch.node = kstrdup(path, GFP_KERNEL); + if (watch->watch.node == NULL) + goto out_free; + + watch->token = kstrdup(token, GFP_KERNEL); + if (watch->token == NULL) + goto out_free; + + return watch; + +out_free: + free_watch_adapter(watch); + +out_fail: + return NULL; +} + +static void watch_fired(struct xenbus_watch *watch, + const char *path, + const char *token) +{ + struct watch_adapter *adap; + struct xsd_sockmsg hdr; + const char *token_caller; + int path_len, tok_len, body_len; + int ret; + LIST_HEAD(staging_q); + + adap = container_of(watch, struct watch_adapter, watch); + + token_caller = adap->token; + + path_len = strlen(path) + 1; + tok_len = strlen(token_caller) + 1; + body_len = path_len + tok_len; + + hdr.type = XS_WATCH_EVENT; + hdr.len = body_len; + + mutex_lock(&adap->dev_data->reply_mutex); + + ret = queue_reply(&staging_q, &hdr, sizeof(hdr)); + if (!ret) + ret = queue_reply(&staging_q, path, path_len); + if (!ret) + ret = queue_reply(&staging_q, token_caller, tok_len); + + if (!ret) { + /* success: pass reply list onto watcher */ + list_splice_tail(&staging_q, &adap->dev_data->read_buffers); + wake_up(&adap->dev_data->read_waitq); + } else + queue_cleanup(&staging_q); + + mutex_unlock(&adap->dev_data->reply_mutex); +} + +static void xenbus_worker(struct work_struct *wq) +{ + struct xenbus_file_priv *u; + struct xenbus_transaction_holder *trans, *tmp; + struct watch_adapter *watch, *tmp_watch; + struct read_buffer *rb, *tmp_rb; + + u = container_of(wq, struct xenbus_file_priv, wq); + + /* + * No need for locking here because there are no other users, + * by definition. + */ + + list_for_each_entry_safe(trans, tmp, &u->transactions, list) { + xenbus_transaction_end(trans->handle, 1); + list_del(&trans->list); + kfree(trans); + } + + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { + unregister_xenbus_watch(&watch->watch); + list_del(&watch->list); + free_watch_adapter(watch); + } + + list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) { + list_del(&rb->list); + kfree(rb); + } + kfree(u); +} + +static void xenbus_file_free(struct kref *kref) +{ + struct xenbus_file_priv *u; + + /* + * We might be called in xenbus_thread(). + * Use workqueue to avoid deadlock. + */ + u = container_of(kref, struct xenbus_file_priv, kref); + schedule_work(&u->wq); +} + +static struct xenbus_transaction_holder *xenbus_get_transaction( + struct xenbus_file_priv *u, uint32_t tx_id) +{ + struct xenbus_transaction_holder *trans; + + list_for_each_entry(trans, &u->transactions, list) + if (trans->handle.id == tx_id) + return trans; + + return NULL; +} + +void xenbus_dev_queue_reply(struct xb_req_data *req) +{ + struct xenbus_file_priv *u = req->par; + struct xenbus_transaction_holder *trans = NULL; + int rc; + LIST_HEAD(staging_q); + + xs_request_exit(req); + + mutex_lock(&u->msgbuffer_mutex); + + if (req->type == XS_TRANSACTION_START) { + trans = xenbus_get_transaction(u, 0); + if (WARN_ON(!trans)) + goto out; + if (req->msg.type == XS_ERROR) { + list_del(&trans->list); + kfree(trans); + } else { + rc = kstrtou32(req->body, 10, &trans->handle.id); + if (WARN_ON(rc)) + goto out; + } + } else if (req->type == XS_TRANSACTION_END) { + trans = xenbus_get_transaction(u, req->msg.tx_id); + if (WARN_ON(!trans)) + goto out; + list_del(&trans->list); + kfree(trans); + } + + mutex_unlock(&u->msgbuffer_mutex); + + mutex_lock(&u->reply_mutex); + rc = queue_reply(&staging_q, &req->msg, sizeof(req->msg)); + if (!rc) + rc = queue_reply(&staging_q, req->body, req->msg.len); + if (!rc) { + list_splice_tail(&staging_q, &u->read_buffers); + wake_up(&u->read_waitq); + } else { + queue_cleanup(&staging_q); + } + mutex_unlock(&u->reply_mutex); + + kfree(req->body); + kfree(req); + + kref_put(&u->kref, xenbus_file_free); + + return; + + out: + mutex_unlock(&u->msgbuffer_mutex); +} + +static int xenbus_command_reply(struct xenbus_file_priv *u, + unsigned int msg_type, const char *reply) +{ + struct { + struct xsd_sockmsg hdr; + char body[16]; + } msg; + int rc; + + msg.hdr = u->u.msg; + msg.hdr.type = msg_type; + msg.hdr.len = strlen(reply) + 1; + if (msg.hdr.len > sizeof(msg.body)) + return -E2BIG; + memcpy(&msg.body, reply, msg.hdr.len); + + mutex_lock(&u->reply_mutex); + rc = queue_reply(&u->read_buffers, &msg, sizeof(msg.hdr) + msg.hdr.len); + wake_up(&u->read_waitq); + mutex_unlock(&u->reply_mutex); + + if (!rc) + kref_put(&u->kref, xenbus_file_free); + + return rc; +} + +static int xenbus_write_transaction(unsigned msg_type, + struct xenbus_file_priv *u) +{ + int rc; + struct xenbus_transaction_holder *trans = NULL; + struct { + struct xsd_sockmsg hdr; + char body[]; + } *msg = (void *)u->u.buffer; + + if (msg_type == XS_TRANSACTION_START) { + trans = kzalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) { + rc = -ENOMEM; + goto out; + } + trans->generation_id = xb_dev_generation_id; + list_add(&trans->list, &u->transactions); + } else if (msg->hdr.tx_id != 0 && + !xenbus_get_transaction(u, msg->hdr.tx_id)) + return xenbus_command_reply(u, XS_ERROR, "ENOENT"); + else if (msg_type == XS_TRANSACTION_END && + !(msg->hdr.len == 2 && + (!strcmp(msg->body, "T") || !strcmp(msg->body, "F")))) + return xenbus_command_reply(u, XS_ERROR, "EINVAL"); + else if (msg_type == XS_TRANSACTION_END) { + trans = xenbus_get_transaction(u, msg->hdr.tx_id); + if (trans && trans->generation_id != xb_dev_generation_id) { + list_del(&trans->list); + kfree(trans); + if (!strcmp(msg->body, "T")) + return xenbus_command_reply(u, XS_ERROR, + "EAGAIN"); + else + return xenbus_command_reply(u, + XS_TRANSACTION_END, + "OK"); + } + } + + rc = xenbus_dev_request_and_reply(&msg->hdr, u); + if (rc && trans) { + list_del(&trans->list); + kfree(trans); + } + +out: + return rc; +} + +static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) +{ + struct watch_adapter *watch; + char *path, *token; + int err, rc; + + path = u->u.buffer + sizeof(u->u.msg); + token = memchr(path, 0, u->u.msg.len); + if (token == NULL) { + rc = xenbus_command_reply(u, XS_ERROR, "EINVAL"); + goto out; + } + token++; + if (memchr(token, 0, u->u.msg.len - (token - path)) == NULL) { + rc = xenbus_command_reply(u, XS_ERROR, "EINVAL"); + goto out; + } + + if (msg_type == XS_WATCH) { + watch = alloc_watch_adapter(path, token); + if (watch == NULL) { + rc = -ENOMEM; + goto out; + } + + watch->watch.callback = watch_fired; + watch->dev_data = u; + + err = register_xenbus_watch(&watch->watch); + if (err) { + free_watch_adapter(watch); + rc = err; + goto out; + } + list_add(&watch->list, &u->watches); + } else { + list_for_each_entry(watch, &u->watches, list) { + if (!strcmp(watch->token, token) && + !strcmp(watch->watch.node, path)) { + unregister_xenbus_watch(&watch->watch); + list_del(&watch->list); + free_watch_adapter(watch); + break; + } + } + } + + /* Success. Synthesize a reply to say all is OK. */ + rc = xenbus_command_reply(u, msg_type, "OK"); + +out: + return rc; +} + +static ssize_t xenbus_file_write(struct file *filp, + const char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct xenbus_file_priv *u = filp->private_data; + uint32_t msg_type; + int rc = len; + int ret; + + /* + * We're expecting usermode to be writing properly formed + * xenbus messages. If they write an incomplete message we + * buffer it up. Once it is complete, we act on it. + */ + + /* + * Make sure concurrent writers can't stomp all over each + * other's messages and make a mess of our partial message + * buffer. We don't make any attemppt to stop multiple + * writers from making a mess of each other's incomplete + * messages; we're just trying to guarantee our own internal + * consistency and make sure that single writes are handled + * atomically. + */ + mutex_lock(&u->msgbuffer_mutex); + + /* Get this out of the way early to avoid confusion */ + if (len == 0) + goto out; + + /* Can't write a xenbus message larger we can buffer */ + if (len > sizeof(u->u.buffer) - u->len) { + /* On error, dump existing buffer */ + u->len = 0; + rc = -EINVAL; + goto out; + } + + ret = copy_from_user(u->u.buffer + u->len, ubuf, len); + + if (ret != 0) { + rc = -EFAULT; + goto out; + } + + /* Deal with a partial copy. */ + len -= ret; + rc = len; + + u->len += len; + + /* Return if we haven't got a full message yet */ + if (u->len < sizeof(u->u.msg)) + goto out; /* not even the header yet */ + + /* If we're expecting a message that's larger than we can + possibly send, dump what we have and return an error. */ + if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) { + rc = -E2BIG; + u->len = 0; + goto out; + } + + if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) + goto out; /* incomplete data portion */ + + /* + * OK, now we have a complete message. Do something with it. + */ + + kref_get(&u->kref); + + msg_type = u->u.msg.type; + + switch (msg_type) { + case XS_WATCH: + case XS_UNWATCH: + /* (Un)Ask for some path to be watched for changes */ + ret = xenbus_write_watch(msg_type, u); + break; + + default: + /* Send out a transaction */ + ret = xenbus_write_transaction(msg_type, u); + break; + } + if (ret != 0) { + rc = ret; + kref_put(&u->kref, xenbus_file_free); + } + + /* Buffered message consumed */ + u->len = 0; + + out: + mutex_unlock(&u->msgbuffer_mutex); + return rc; +} + +static int xenbus_file_open(struct inode *inode, struct file *filp) +{ + struct xenbus_file_priv *u; + + if (xen_store_evtchn == 0) + return -ENOENT; + + stream_open(inode, filp); + + u = kzalloc(sizeof(*u), GFP_KERNEL); + if (u == NULL) + return -ENOMEM; + + kref_init(&u->kref); + + INIT_LIST_HEAD(&u->transactions); + INIT_LIST_HEAD(&u->watches); + INIT_LIST_HEAD(&u->read_buffers); + init_waitqueue_head(&u->read_waitq); + INIT_WORK(&u->wq, xenbus_worker); + + mutex_init(&u->reply_mutex); + mutex_init(&u->msgbuffer_mutex); + + filp->private_data = u; + + return 0; +} + +static int xenbus_file_release(struct inode *inode, struct file *filp) +{ + struct xenbus_file_priv *u = filp->private_data; + + kref_put(&u->kref, xenbus_file_free); + + return 0; +} + +static __poll_t xenbus_file_poll(struct file *file, poll_table *wait) +{ + struct xenbus_file_priv *u = file->private_data; + + poll_wait(file, &u->read_waitq, wait); + if (!list_empty(&u->read_buffers)) + return EPOLLIN | EPOLLRDNORM; + return 0; +} + +const struct file_operations xen_xenbus_fops = { + .read = xenbus_file_read, + .write = xenbus_file_write, + .open = xenbus_file_open, + .release = xenbus_file_release, + .poll = xenbus_file_poll, + .llseek = no_llseek, +}; +EXPORT_SYMBOL_GPL(xen_xenbus_fops); + +static struct miscdevice xenbus_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/xenbus", + .fops = &xen_xenbus_fops, +}; + +static int __init xenbus_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + err = misc_register(&xenbus_dev); + if (err) + pr_err("Could not register xenbus frontend device\n"); + return err; +} +device_initcall(xenbus_init); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c new file mode 100644 index 000000000..3205e5d72 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -0,0 +1,1072 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005, 2006 XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt + +#define DPRINTK(fmt, args...) \ + pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ + __func__, __LINE__, ##args) + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/notifier.h> +#include <linux/kthread.h> +#include <linux/mutex.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/module.h> + +#include <asm/page.h> +#include <asm/xen/hypervisor.h> + +#include <xen/xen.h> +#include <xen/xenbus.h> +#include <xen/events.h> +#include <xen/xen-ops.h> +#include <xen/page.h> + +#include <xen/hvm.h> + +#include "xenbus.h" + + +static int xs_init_irq; +int xen_store_evtchn; +EXPORT_SYMBOL_GPL(xen_store_evtchn); + +struct xenstore_domain_interface *xen_store_interface; +EXPORT_SYMBOL_GPL(xen_store_interface); + +enum xenstore_init xen_store_domain_type; +EXPORT_SYMBOL_GPL(xen_store_domain_type); + +static unsigned long xen_store_gfn; + +static BLOCKING_NOTIFIER_HEAD(xenstore_chain); + +/* If something in array of ids matches this device, return it. */ +static const struct xenbus_device_id * +match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) +{ + for (; *arr->devicetype != '\0'; arr++) { + if (!strcmp(arr->devicetype, dev->devicetype)) + return arr; + } + return NULL; +} + +int xenbus_match(struct device *_dev, struct device_driver *_drv) +{ + struct xenbus_driver *drv = to_xenbus_driver(_drv); + + if (!drv->ids) + return 0; + + return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; +} +EXPORT_SYMBOL_GPL(xenbus_match); + + +static void free_otherend_details(struct xenbus_device *dev) +{ + kfree(dev->otherend); + dev->otherend = NULL; +} + + +static void free_otherend_watch(struct xenbus_device *dev) +{ + if (dev->otherend_watch.node) { + unregister_xenbus_watch(&dev->otherend_watch); + kfree(dev->otherend_watch.node); + dev->otherend_watch.node = NULL; + } +} + + +static int talk_to_otherend(struct xenbus_device *dev) +{ + struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); + + free_otherend_watch(dev); + free_otherend_details(dev); + + return drv->read_otherend_details(dev); +} + + + +static int watch_otherend(struct xenbus_device *dev) +{ + struct xen_bus_type *bus = + container_of(dev->dev.bus, struct xen_bus_type, bus); + + return xenbus_watch_pathfmt(dev, &dev->otherend_watch, + bus->otherend_will_handle, + bus->otherend_changed, + "%s/%s", dev->otherend, "state"); +} + + +int xenbus_read_otherend_details(struct xenbus_device *xendev, + char *id_node, char *path_node) +{ + int err = xenbus_gather(XBT_NIL, xendev->nodename, + id_node, "%i", &xendev->otherend_id, + path_node, NULL, &xendev->otherend, + NULL); + if (err) { + xenbus_dev_fatal(xendev, err, + "reading other end details from %s", + xendev->nodename); + return err; + } + if (strlen(xendev->otherend) == 0 || + !xenbus_exists(XBT_NIL, xendev->otherend, "")) { + xenbus_dev_fatal(xendev, -ENOENT, + "unable to read other end from %s. " + "missing or inaccessible.", + xendev->nodename); + free_otherend_details(xendev); + return -ENOENT; + } + + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_read_otherend_details); + +void xenbus_otherend_changed(struct xenbus_watch *watch, + const char *path, const char *token, + int ignore_on_shutdown) +{ + struct xenbus_device *dev = + container_of(watch, struct xenbus_device, otherend_watch); + struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); + enum xenbus_state state; + + /* Protect us against watches firing on old details when the otherend + details change, say immediately after a resume. */ + if (!dev->otherend || + strncmp(dev->otherend, path, strlen(dev->otherend))) { + dev_dbg(&dev->dev, "Ignoring watch at %s\n", path); + return; + } + + state = xenbus_read_driver_state(dev->otherend); + + dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n", + state, xenbus_strstate(state), dev->otherend_watch.node, path); + + /* + * Ignore xenbus transitions during shutdown. This prevents us doing + * work that can fail e.g., when the rootfs is gone. + */ + if (system_state > SYSTEM_RUNNING) { + if (ignore_on_shutdown && (state == XenbusStateClosing)) + xenbus_frontend_closed(dev); + return; + } + + if (drv->otherend_changed) + drv->otherend_changed(dev, state); +} +EXPORT_SYMBOL_GPL(xenbus_otherend_changed); + +#define XENBUS_SHOW_STAT(name) \ +static ssize_t name##_show(struct device *_dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct xenbus_device *dev = to_xenbus_device(_dev); \ + \ + return sprintf(buf, "%d\n", atomic_read(&dev->name)); \ +} \ +static DEVICE_ATTR_RO(name) + +XENBUS_SHOW_STAT(event_channels); +XENBUS_SHOW_STAT(events); +XENBUS_SHOW_STAT(spurious_events); +XENBUS_SHOW_STAT(jiffies_eoi_delayed); + +static ssize_t spurious_threshold_show(struct device *_dev, + struct device_attribute *attr, + char *buf) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + + return sprintf(buf, "%d\n", dev->spurious_threshold); +} + +static ssize_t spurious_threshold_store(struct device *_dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + unsigned int val; + ssize_t ret; + + ret = kstrtouint(buf, 0, &val); + if (ret) + return ret; + + dev->spurious_threshold = val; + + return count; +} + +static DEVICE_ATTR_RW(spurious_threshold); + +static struct attribute *xenbus_attrs[] = { + &dev_attr_event_channels.attr, + &dev_attr_events.attr, + &dev_attr_spurious_events.attr, + &dev_attr_jiffies_eoi_delayed.attr, + &dev_attr_spurious_threshold.attr, + NULL +}; + +static const struct attribute_group xenbus_group = { + .name = "xenbus", + .attrs = xenbus_attrs, +}; + +int xenbus_dev_probe(struct device *_dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); + const struct xenbus_device_id *id; + int err; + + DPRINTK("%s", dev->nodename); + + if (!drv->probe) { + err = -ENODEV; + goto fail; + } + + id = match_device(drv->ids, dev); + if (!id) { + err = -ENODEV; + goto fail; + } + + err = talk_to_otherend(dev); + if (err) { + dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n", + dev->nodename); + return err; + } + + if (!try_module_get(drv->driver.owner)) { + dev_warn(&dev->dev, "failed to acquire module reference on '%s'\n", + drv->driver.name); + err = -ESRCH; + goto fail; + } + + down(&dev->reclaim_sem); + err = drv->probe(dev, id); + up(&dev->reclaim_sem); + if (err) + goto fail_put; + + err = watch_otherend(dev); + if (err) { + dev_warn(&dev->dev, "watch_otherend on %s failed.\n", + dev->nodename); + return err; + } + + dev->spurious_threshold = 1; + if (sysfs_create_group(&dev->dev.kobj, &xenbus_group)) + dev_warn(&dev->dev, "sysfs_create_group on %s failed.\n", + dev->nodename); + + return 0; +fail_put: + module_put(drv->driver.owner); +fail: + xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_dev_probe); + +void xenbus_dev_remove(struct device *_dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); + + DPRINTK("%s", dev->nodename); + + sysfs_remove_group(&dev->dev.kobj, &xenbus_group); + + free_otherend_watch(dev); + + if (drv->remove) { + down(&dev->reclaim_sem); + drv->remove(dev); + up(&dev->reclaim_sem); + } + + module_put(drv->driver.owner); + + free_otherend_details(dev); + + /* + * If the toolstack has forced the device state to closing then set + * the state to closed now to allow it to be cleaned up. + * Similarly, if the driver does not support re-bind, set the + * closed. + */ + if (!drv->allow_rebind || + xenbus_read_driver_state(dev->nodename) == XenbusStateClosing) + xenbus_switch_state(dev, XenbusStateClosed); +} +EXPORT_SYMBOL_GPL(xenbus_dev_remove); + +int xenbus_register_driver_common(struct xenbus_driver *drv, + struct xen_bus_type *bus, + struct module *owner, const char *mod_name) +{ + drv->driver.name = drv->name ? drv->name : drv->ids[0].devicetype; + drv->driver.bus = &bus->bus; + drv->driver.owner = owner; + drv->driver.mod_name = mod_name; + + return driver_register(&drv->driver); +} +EXPORT_SYMBOL_GPL(xenbus_register_driver_common); + +void xenbus_unregister_driver(struct xenbus_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL_GPL(xenbus_unregister_driver); + +struct xb_find_info { + struct xenbus_device *dev; + const char *nodename; +}; + +static int cmp_dev(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct xb_find_info *info = data; + + if (!strcmp(xendev->nodename, info->nodename)) { + info->dev = xendev; + get_device(dev); + return 1; + } + return 0; +} + +static struct xenbus_device *xenbus_device_find(const char *nodename, + struct bus_type *bus) +{ + struct xb_find_info info = { .dev = NULL, .nodename = nodename }; + + bus_for_each_dev(bus, NULL, &info, cmp_dev); + return info.dev; +} + +static int cleanup_dev(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct xb_find_info *info = data; + int len = strlen(info->nodename); + + DPRINTK("%s", info->nodename); + + /* Match the info->nodename path, or any subdirectory of that path. */ + if (strncmp(xendev->nodename, info->nodename, len)) + return 0; + + /* If the node name is longer, ensure it really is a subdirectory. */ + if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/')) + return 0; + + info->dev = xendev; + get_device(dev); + return 1; +} + +static void xenbus_cleanup_devices(const char *path, struct bus_type *bus) +{ + struct xb_find_info info = { .nodename = path }; + + do { + info.dev = NULL; + bus_for_each_dev(bus, NULL, &info, cleanup_dev); + if (info.dev) { + device_unregister(&info.dev->dev); + put_device(&info.dev->dev); + } + } while (info.dev); +} + +static void xenbus_dev_release(struct device *dev) +{ + if (dev) + kfree(to_xenbus_device(dev)); +} + +static ssize_t nodename_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); +} +static DEVICE_ATTR_RO(nodename); + +static ssize_t devtype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); +} +static DEVICE_ATTR_RO(devtype); + +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s:%s\n", dev->bus->name, + to_xenbus_device(dev)->devicetype); +} +static DEVICE_ATTR_RO(modalias); + +static ssize_t state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", + xenbus_strstate(to_xenbus_device(dev)->state)); +} +static DEVICE_ATTR_RO(state); + +static struct attribute *xenbus_dev_attrs[] = { + &dev_attr_nodename.attr, + &dev_attr_devtype.attr, + &dev_attr_modalias.attr, + &dev_attr_state.attr, + NULL, +}; + +static const struct attribute_group xenbus_dev_group = { + .attrs = xenbus_dev_attrs, +}; + +const struct attribute_group *xenbus_dev_groups[] = { + &xenbus_dev_group, + NULL, +}; +EXPORT_SYMBOL_GPL(xenbus_dev_groups); + +int xenbus_probe_node(struct xen_bus_type *bus, + const char *type, + const char *nodename) +{ + char devname[XEN_BUS_ID_SIZE]; + int err; + struct xenbus_device *xendev; + size_t stringlen; + char *tmpstring; + + enum xenbus_state state = xenbus_read_driver_state(nodename); + + if (state != XenbusStateInitialising) { + /* Device is not new, so ignore it. This can happen if a + device is going away after switching to Closed. */ + return 0; + } + + stringlen = strlen(nodename) + 1 + strlen(type) + 1; + xendev = kzalloc(sizeof(*xendev) + stringlen, GFP_KERNEL); + if (!xendev) + return -ENOMEM; + + xendev->state = XenbusStateInitialising; + + /* Copy the strings into the extra space. */ + + tmpstring = (char *)(xendev + 1); + strcpy(tmpstring, nodename); + xendev->nodename = tmpstring; + + tmpstring += strlen(tmpstring) + 1; + strcpy(tmpstring, type); + xendev->devicetype = tmpstring; + init_completion(&xendev->down); + + xendev->dev.bus = &bus->bus; + xendev->dev.release = xenbus_dev_release; + + err = bus->get_bus_id(devname, xendev->nodename); + if (err) + goto fail; + + dev_set_name(&xendev->dev, "%s", devname); + sema_init(&xendev->reclaim_sem, 1); + + /* Register with generic device framework. */ + err = device_register(&xendev->dev); + if (err) { + put_device(&xendev->dev); + xendev = NULL; + goto fail; + } + + return 0; +fail: + kfree(xendev); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_probe_node); + +static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) +{ + int err = 0; + char **dir; + unsigned int dir_n = 0; + int i; + + dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + for (i = 0; i < dir_n; i++) { + err = bus->probe(bus, type, dir[i]); + if (err) + break; + } + + kfree(dir); + return err; +} + +int xenbus_probe_devices(struct xen_bus_type *bus) +{ + int err = 0; + char **dir; + unsigned int i, dir_n; + + dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_device_type(bus, dir[i]); + if (err) + break; + } + + kfree(dir); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_probe_devices); + +static unsigned int char_count(const char *str, char c) +{ + unsigned int i, ret = 0; + + for (i = 0; str[i]; i++) + if (str[i] == c) + ret++; + return ret; +} + +static int strsep_len(const char *str, char c, unsigned int len) +{ + unsigned int i; + + for (i = 0; str[i]; i++) + if (str[i] == c) { + if (len == 0) + return i; + len--; + } + return (len == 0) ? i : -ERANGE; +} + +void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) +{ + int exists, rootlen; + struct xenbus_device *dev; + char type[XEN_BUS_ID_SIZE]; + const char *p, *root; + + if (char_count(node, '/') < 2) + return; + + exists = xenbus_exists(XBT_NIL, node, ""); + if (!exists) { + xenbus_cleanup_devices(node, &bus->bus); + return; + } + + /* backend/<type>/... or device/<type>/... */ + p = strchr(node, '/') + 1; + snprintf(type, XEN_BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p); + type[XEN_BUS_ID_SIZE-1] = '\0'; + + rootlen = strsep_len(node, '/', bus->levels); + if (rootlen < 0) + return; + root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node); + if (!root) + return; + + dev = xenbus_device_find(root, &bus->bus); + if (!dev) + xenbus_probe_node(bus, type, root); + else + put_device(&dev->dev); + + kfree(root); +} +EXPORT_SYMBOL_GPL(xenbus_dev_changed); + +int xenbus_dev_suspend(struct device *dev) +{ + int err = 0; + struct xenbus_driver *drv; + struct xenbus_device *xdev + = container_of(dev, struct xenbus_device, dev); + + DPRINTK("%s", xdev->nodename); + + if (dev->driver == NULL) + return 0; + drv = to_xenbus_driver(dev->driver); + if (drv->suspend) + err = drv->suspend(xdev); + if (err) + dev_warn(dev, "suspend failed: %i\n", err); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_dev_suspend); + +int xenbus_dev_resume(struct device *dev) +{ + int err; + struct xenbus_driver *drv; + struct xenbus_device *xdev + = container_of(dev, struct xenbus_device, dev); + + DPRINTK("%s", xdev->nodename); + + if (dev->driver == NULL) + return 0; + drv = to_xenbus_driver(dev->driver); + err = talk_to_otherend(xdev); + if (err) { + dev_warn(dev, "resume (talk_to_otherend) failed: %i\n", err); + return err; + } + + xdev->state = XenbusStateInitialising; + + if (drv->resume) { + err = drv->resume(xdev); + if (err) { + dev_warn(dev, "resume failed: %i\n", err); + return err; + } + } + + err = watch_otherend(xdev); + if (err) { + dev_warn(dev, "resume (watch_otherend) failed: %d\n", err); + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_dev_resume); + +int xenbus_dev_cancel(struct device *dev) +{ + /* Do nothing */ + DPRINTK("cancel"); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_dev_cancel); + +/* A flag to determine if xenstored is 'ready' (i.e. has started) */ +int xenstored_ready; + + +int register_xenstore_notifier(struct notifier_block *nb) +{ + int ret = 0; + + if (xenstored_ready > 0) + ret = nb->notifier_call(nb, 0, NULL); + else + blocking_notifier_chain_register(&xenstore_chain, nb); + + return ret; +} +EXPORT_SYMBOL_GPL(register_xenstore_notifier); + +void unregister_xenstore_notifier(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&xenstore_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); + +static void xenbus_probe(void) +{ + xenstored_ready = 1; + + if (!xen_store_interface) { + xen_store_interface = memremap(xen_store_gfn << XEN_PAGE_SHIFT, + XEN_PAGE_SIZE, MEMREMAP_WB); + /* + * Now it is safe to free the IRQ used for xenstore late + * initialization. No need to unbind: it is about to be + * bound again from xb_init_comms. Note that calling + * unbind_from_irqhandler now would result in xen_evtchn_close() + * being called and the event channel not being enabled again + * afterwards, resulting in missed event notifications. + */ + free_irq(xs_init_irq, &xb_waitq); + } + + /* + * In the HVM case, xenbus_init() deferred its call to + * xs_init() in case callbacks were not operational yet. + * So do it now. + */ + if (xen_store_domain_type == XS_HVM) + xs_init(); + + /* Notify others that xenstore is up */ + blocking_notifier_call_chain(&xenstore_chain, 0, NULL); +} + +/* + * Returns true when XenStore init must be deferred in order to + * allow the PCI platform device to be initialised, before we + * can actually have event channel interrupts working. + */ +static bool xs_hvm_defer_init_for_callback(void) +{ +#ifdef CONFIG_XEN_PVHVM + return xen_store_domain_type == XS_HVM && + !xen_have_vector_callback; +#else + return false; +#endif +} + +static int xenbus_probe_thread(void *unused) +{ + DEFINE_WAIT(w); + + /* + * We actually just want to wait for *any* trigger of xb_waitq, + * and run xenbus_probe() the moment it occurs. + */ + prepare_to_wait(&xb_waitq, &w, TASK_INTERRUPTIBLE); + schedule(); + finish_wait(&xb_waitq, &w); + + DPRINTK("probing"); + xenbus_probe(); + return 0; +} + +static int __init xenbus_probe_initcall(void) +{ + if (!xen_domain()) + return -ENODEV; + + /* + * Probe XenBus here in the XS_PV case, and also XS_HVM unless we + * need to wait for the platform PCI device to come up or + * xen_store_interface is not ready. + */ + if (xen_store_domain_type == XS_PV || + (xen_store_domain_type == XS_HVM && + !xs_hvm_defer_init_for_callback() && + xen_store_interface != NULL)) + xenbus_probe(); + + /* + * For XS_LOCAL or when xen_store_interface is not ready, spawn a + * thread which will wait for xenstored or a xenstore-stubdom to be + * started, then probe. It will be triggered when communication + * starts happening, by waiting on xb_waitq. + */ + if (xen_store_domain_type == XS_LOCAL || xen_store_interface == NULL) { + struct task_struct *probe_task; + + probe_task = kthread_run(xenbus_probe_thread, NULL, + "xenbus_probe"); + if (IS_ERR(probe_task)) + return PTR_ERR(probe_task); + } + return 0; +} +device_initcall(xenbus_probe_initcall); + +int xen_set_callback_via(uint64_t via) +{ + struct xen_hvm_param a; + int ret; + + a.domid = DOMID_SELF; + a.index = HVM_PARAM_CALLBACK_IRQ; + a.value = via; + + ret = HYPERVISOR_hvm_op(HVMOP_set_param, &a); + if (ret) + return ret; + + /* + * If xenbus_probe_initcall() deferred the xenbus_probe() + * due to the callback not functioning yet, we can do it now. + */ + if (!xenstored_ready && xs_hvm_defer_init_for_callback()) + xenbus_probe(); + + return ret; +} +EXPORT_SYMBOL_GPL(xen_set_callback_via); + +/* Set up event channel for xenstored which is run as a local process + * (this is normally used only in dom0) + */ +static int __init xenstored_local_init(void) +{ + int err = -ENOMEM; + unsigned long page = 0; + struct evtchn_alloc_unbound alloc_unbound; + + /* Allocate Xenstore page */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + goto out_err; + + xen_store_gfn = virt_to_gfn((void *)page); + + /* Next allocate a local port which xenstored can bind to */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = DOMID_SELF; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err == -ENOSYS) + goto out_err; + + BUG_ON(err); + xen_store_evtchn = alloc_unbound.port; + + return 0; + + out_err: + if (page != 0) + free_page(page); + return err; +} + +static int xenbus_resume_cb(struct notifier_block *nb, + unsigned long action, void *data) +{ + int err = 0; + + if (xen_hvm_domain()) { + uint64_t v = 0; + + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (!err && v) + xen_store_evtchn = v; + else + pr_warn("Cannot update xenstore event channel: %d\n", + err); + } else + xen_store_evtchn = xen_start_info->store_evtchn; + + return err; +} + +static struct notifier_block xenbus_resume_nb = { + .notifier_call = xenbus_resume_cb, +}; + +static irqreturn_t xenbus_late_init(int irq, void *unused) +{ + int err; + uint64_t v = 0; + + err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + if (err || !v || !~v) + return IRQ_HANDLED; + xen_store_gfn = (unsigned long)v; + + wake_up(&xb_waitq); + return IRQ_HANDLED; +} + +static int __init xenbus_init(void) +{ + int err; + uint64_t v = 0; + bool wait = false; + xen_store_domain_type = XS_UNKNOWN; + + if (!xen_domain()) + return -ENODEV; + + xenbus_ring_ops_init(); + + if (xen_pv_domain()) + xen_store_domain_type = XS_PV; + if (xen_hvm_domain()) + xen_store_domain_type = XS_HVM; + if (xen_hvm_domain() && xen_initial_domain()) + xen_store_domain_type = XS_LOCAL; + if (xen_pv_domain() && !xen_start_info->store_evtchn) + xen_store_domain_type = XS_LOCAL; + if (xen_pv_domain() && xen_start_info->store_evtchn) + xenstored_ready = 1; + + switch (xen_store_domain_type) { + case XS_LOCAL: + err = xenstored_local_init(); + if (err) + goto out_error; + xen_store_interface = gfn_to_virt(xen_store_gfn); + break; + case XS_PV: + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_gfn = xen_start_info->store_mfn; + xen_store_interface = gfn_to_virt(xen_store_gfn); + break; + case XS_HVM: + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) + goto out_error; + xen_store_evtchn = (int)v; + err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + if (err) + goto out_error; + /* + * Uninitialized hvm_params are zero and return no error. + * Although it is theoretically possible to have + * HVM_PARAM_STORE_PFN set to zero on purpose, in reality it is + * not zero when valid. If zero, it means that Xenstore hasn't + * been properly initialized. Instead of attempting to map a + * wrong guest physical address return error. + * + * Also recognize all bits set as an invalid/uninitialized value. + */ + if (!v) { + err = -ENOENT; + goto out_error; + } + if (v == ~0ULL) { + wait = true; + } else { + /* Avoid truncation on 32-bit. */ +#if BITS_PER_LONG == 32 + if (v > ULONG_MAX) { + pr_err("%s: cannot handle HVM_PARAM_STORE_PFN=%llx > ULONG_MAX\n", + __func__, v); + err = -EINVAL; + goto out_error; + } +#endif + xen_store_gfn = (unsigned long)v; + xen_store_interface = + memremap(xen_store_gfn << XEN_PAGE_SHIFT, + XEN_PAGE_SIZE, MEMREMAP_WB); + if (xen_store_interface->connection != XENSTORE_CONNECTED) + wait = true; + } + if (wait) { + err = bind_evtchn_to_irqhandler(xen_store_evtchn, + xenbus_late_init, + 0, "xenstore_late_init", + &xb_waitq); + if (err < 0) { + pr_err("xenstore_late_init couldn't bind irq err=%d\n", + err); + goto out_error; + } + + xs_init_irq = err; + } + break; + default: + pr_warn("Xenstore state unknown\n"); + break; + } + + /* + * HVM domains may not have a functional callback yet. In that + * case let xs_init() be called from xenbus_probe(), which will + * get invoked at an appropriate time. + */ + if (xen_store_domain_type != XS_HVM) { + err = xs_init(); + if (err) { + pr_warn("Error initializing xenstore comms: %i\n", err); + goto out_error; + } + } + + if ((xen_store_domain_type != XS_LOCAL) && + (xen_store_domain_type != XS_UNKNOWN)) + xen_resume_notifier_register(&xenbus_resume_nb); + +#ifdef CONFIG_XEN_COMPAT_XENFS + /* + * Create xenfs mountpoint in /proc for compatibility with + * utilities that expect to find "xenbus" under "/proc/xen". + */ + proc_create_mount_point("xen"); +#endif + return 0; + +out_error: + xen_store_domain_type = XS_UNKNOWN; + return err; +} + +postcore_initcall(xenbus_init); + +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c new file mode 100644 index 000000000..9c09f89d8 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -0,0 +1,313 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have (backend half). + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005, 2006 XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define DPRINTK(fmt, ...) \ + pr_debug("(%s:%d) " fmt "\n", \ + __func__, __LINE__, ##__VA_ARGS__) + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/notifier.h> +#include <linux/export.h> +#include <linux/semaphore.h> + +#include <asm/page.h> +#include <asm/xen/hypervisor.h> +#include <asm/hypervisor.h> +#include <xen/xenbus.h> +#include <xen/features.h> + +#include "xenbus.h" + +/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */ +static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) +{ + int domid, err; + const char *devid, *type, *frontend; + unsigned int typelen; + + type = strchr(nodename, '/'); + if (!type) + return -EINVAL; + type++; + typelen = strcspn(type, "/"); + if (!typelen || type[typelen] != '/') + return -EINVAL; + + devid = strrchr(nodename, '/') + 1; + + err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid, + "frontend", NULL, &frontend, + NULL); + if (err) + return err; + if (strlen(frontend) == 0) + err = -ERANGE; + if (!err && !xenbus_exists(XBT_NIL, frontend, "")) + err = -ENOENT; + kfree(frontend); + + if (err) + return err; + + if (snprintf(bus_id, XEN_BUS_ID_SIZE, "%.*s-%i-%s", + typelen, type, domid, devid) >= XEN_BUS_ID_SIZE) + return -ENOSPC; + return 0; +} + +static int xenbus_uevent_backend(struct device *dev, + struct kobj_uevent_env *env) +{ + struct xenbus_device *xdev; + struct xenbus_driver *drv; + struct xen_bus_type *bus; + + DPRINTK(""); + + if (dev == NULL) + return -ENODEV; + + xdev = to_xenbus_device(dev); + bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); + + if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) + return -ENOMEM; + + /* stuff we want to pass to /sbin/hotplug */ + if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) + return -ENOMEM; + + if (add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename)) + return -ENOMEM; + + if (add_uevent_var(env, "XENBUS_BASE_PATH=%s", bus->root)) + return -ENOMEM; + + if (dev->driver) { + drv = to_xenbus_driver(dev->driver); + if (drv && drv->uevent) + return drv->uevent(xdev, env); + } + + return 0; +} + +/* backend/<typename>/<frontend-uuid>/<name> */ +static int xenbus_probe_backend_unit(struct xen_bus_type *bus, + const char *dir, + const char *type, + const char *name) +{ + char *nodename; + int err; + + nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s\n", nodename); + + err = xenbus_probe_node(bus, type, nodename); + kfree(nodename); + return err; +} + +/* backend/<typename>/<frontend-domid> */ +static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, + const char *domid) +{ + char *nodename; + int err = 0; + char **dir; + unsigned int i, dir_n = 0; + + DPRINTK(""); + + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, domid); + if (!nodename) + return -ENOMEM; + + dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n); + if (IS_ERR(dir)) { + kfree(nodename); + return PTR_ERR(dir); + } + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_backend_unit(bus, nodename, type, dir[i]); + if (err) + break; + } + kfree(dir); + kfree(nodename); + return err; +} + +static bool frontend_will_handle(struct xenbus_watch *watch, + const char *path, const char *token) +{ + return watch->nr_pending == 0; +} + +static void frontend_changed(struct xenbus_watch *watch, + const char *path, const char *token) +{ + xenbus_otherend_changed(watch, path, token, 0); +} + +static struct xen_bus_type xenbus_backend = { + .root = "backend", + .levels = 3, /* backend/type/<frontend>/<id> */ + .get_bus_id = backend_bus_id, + .probe = xenbus_probe_backend, + .otherend_will_handle = frontend_will_handle, + .otherend_changed = frontend_changed, + .bus = { + .name = "xen-backend", + .match = xenbus_match, + .uevent = xenbus_uevent_backend, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, + .dev_groups = xenbus_dev_groups, + }, +}; + +static void backend_changed(struct xenbus_watch *watch, + const char *path, const char *token) +{ + DPRINTK(""); + + xenbus_dev_changed(path, &xenbus_backend); +} + +static struct xenbus_watch be_watch = { + .node = "backend", + .callback = backend_changed, +}; + +static int read_frontend_details(struct xenbus_device *xendev) +{ + return xenbus_read_otherend_details(xendev, "frontend-id", "frontend"); +} + +int xenbus_dev_is_online(struct xenbus_device *dev) +{ + return !!xenbus_read_unsigned(dev->nodename, "online", 0); +} +EXPORT_SYMBOL_GPL(xenbus_dev_is_online); + +int __xenbus_register_backend(struct xenbus_driver *drv, struct module *owner, + const char *mod_name) +{ + drv->read_otherend_details = read_frontend_details; + + return xenbus_register_driver_common(drv, &xenbus_backend, + owner, mod_name); +} +EXPORT_SYMBOL_GPL(__xenbus_register_backend); + +static int backend_probe_and_watch(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + /* Enumerate devices in xenstore and watch for changes. */ + xenbus_probe_devices(&xenbus_backend); + register_xenbus_watch(&be_watch); + + return NOTIFY_DONE; +} + +static int backend_reclaim_memory(struct device *dev, void *data) +{ + const struct xenbus_driver *drv; + struct xenbus_device *xdev; + + if (!dev->driver) + return 0; + drv = to_xenbus_driver(dev->driver); + if (drv && drv->reclaim_memory) { + xdev = to_xenbus_device(dev); + if (down_trylock(&xdev->reclaim_sem)) + return 0; + drv->reclaim_memory(xdev); + up(&xdev->reclaim_sem); + } + return 0; +} + +/* + * Returns 0 always because we are using shrinker to only detect memory + * pressure. + */ +static unsigned long backend_shrink_memory_count(struct shrinker *shrinker, + struct shrink_control *sc) +{ + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, + backend_reclaim_memory); + return 0; +} + +static struct shrinker backend_memory_shrinker = { + .count_objects = backend_shrink_memory_count, + .seeks = DEFAULT_SEEKS, +}; + +static int __init xenbus_probe_backend_init(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = backend_probe_and_watch + }; + int err; + + DPRINTK(""); + + /* Register ourselves with the kernel bus subsystem */ + err = bus_register(&xenbus_backend.bus); + if (err) + return err; + + register_xenstore_notifier(&xenstore_notifier); + + if (register_shrinker(&backend_memory_shrinker, "xen-backend")) + pr_warn("shrinker registration failed\n"); + + return 0; +} +subsys_initcall(xenbus_probe_backend_init); diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c new file mode 100644 index 000000000..f44d5a643 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -0,0 +1,516 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define DPRINTK(fmt, ...) \ + pr_debug("(%s:%d) " fmt "\n", \ + __func__, __LINE__, ##__VA_ARGS__) + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/notifier.h> +#include <linux/kthread.h> +#include <linux/mutex.h> +#include <linux/io.h> +#include <linux/module.h> + +#include <asm/page.h> +#include <asm/xen/hypervisor.h> +#include <xen/xenbus.h> +#include <xen/events.h> +#include <xen/page.h> +#include <xen/xen.h> + +#include <xen/platform_pci.h> + +#include "xenbus.h" + + + +/* device/<type>/<id> => <type>-<id> */ +static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) +{ + nodename = strchr(nodename, '/'); + if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { + pr_warn("bad frontend %s\n", nodename); + return -EINVAL; + } + + strscpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); + if (!strchr(bus_id, '/')) { + pr_warn("bus_id %s no slash\n", bus_id); + return -EINVAL; + } + *strchr(bus_id, '/') = '-'; + return 0; +} + +/* device/<typename>/<name> */ +static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type, + const char *name) +{ + char *nodename; + int err; + + /* ignore console/0 */ + if (!strncmp(type, "console", 7) && !strncmp(name, "0", 1)) { + DPRINTK("Ignoring buggy device entry console/0"); + return 0; + } + + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s", nodename); + + err = xenbus_probe_node(bus, type, nodename); + kfree(nodename); + return err; +} + +static int xenbus_uevent_frontend(struct device *_dev, + struct kobj_uevent_env *env) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + + if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) + return -ENOMEM; + + return 0; +} + + +static void backend_changed(struct xenbus_watch *watch, + const char *path, const char *token) +{ + xenbus_otherend_changed(watch, path, token, 1); +} + +static void xenbus_frontend_delayed_resume(struct work_struct *w) +{ + struct xenbus_device *xdev = container_of(w, struct xenbus_device, work); + + xenbus_dev_resume(&xdev->dev); +} + +static int xenbus_frontend_dev_resume(struct device *dev) +{ + /* + * If xenstored is running in this domain, we cannot access the backend + * state at the moment, so we need to defer xenbus_dev_resume + */ + if (xen_store_domain_type == XS_LOCAL) { + struct xenbus_device *xdev = to_xenbus_device(dev); + + schedule_work(&xdev->work); + + return 0; + } + + return xenbus_dev_resume(dev); +} + +static int xenbus_frontend_dev_probe(struct device *dev) +{ + if (xen_store_domain_type == XS_LOCAL) { + struct xenbus_device *xdev = to_xenbus_device(dev); + INIT_WORK(&xdev->work, xenbus_frontend_delayed_resume); + } + + return xenbus_dev_probe(dev); +} + +static void xenbus_frontend_dev_shutdown(struct device *_dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + unsigned long timeout = 5*HZ; + + DPRINTK("%s", dev->nodename); + + get_device(&dev->dev); + if (dev->state != XenbusStateConnected) { + pr_info("%s: %s: %s != Connected, skipping\n", + __func__, dev->nodename, xenbus_strstate(dev->state)); + goto out; + } + xenbus_switch_state(dev, XenbusStateClosing); + timeout = wait_for_completion_timeout(&dev->down, timeout); + if (!timeout) + pr_info("%s: %s timeout closing device\n", + __func__, dev->nodename); + out: + put_device(&dev->dev); +} + +static const struct dev_pm_ops xenbus_pm_ops = { + .suspend = xenbus_dev_suspend, + .resume = xenbus_frontend_dev_resume, + .freeze = xenbus_dev_suspend, + .thaw = xenbus_dev_cancel, + .restore = xenbus_dev_resume, +}; + +static struct xen_bus_type xenbus_frontend = { + .root = "device", + .levels = 2, /* device/type/<id> */ + .get_bus_id = frontend_bus_id, + .probe = xenbus_probe_frontend, + .otherend_changed = backend_changed, + .bus = { + .name = "xen", + .match = xenbus_match, + .uevent = xenbus_uevent_frontend, + .probe = xenbus_frontend_dev_probe, + .remove = xenbus_dev_remove, + .shutdown = xenbus_frontend_dev_shutdown, + .dev_groups = xenbus_dev_groups, + + .pm = &xenbus_pm_ops, + }, +}; + +static void frontend_changed(struct xenbus_watch *watch, + const char *path, const char *token) +{ + DPRINTK(""); + + xenbus_dev_changed(path, &xenbus_frontend); +} + + +/* We watch for devices appearing and vanishing. */ +static struct xenbus_watch fe_watch = { + .node = "device", + .callback = frontend_changed, +}; + +static int read_backend_details(struct xenbus_device *xendev) +{ + return xenbus_read_otherend_details(xendev, "backend-id", "backend"); +} + +static int is_device_connecting(struct device *dev, void *data, bool ignore_nonessential) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + struct xenbus_driver *xendrv; + + /* + * A device with no driver will never connect. We care only about + * devices which should currently be in the process of connecting. + */ + if (!dev->driver) + return 0; + + /* Is this search limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + + xendrv = to_xenbus_driver(dev->driver); + + if (ignore_nonessential && xendrv->not_essential) + return 0; + + return (xendev->state < XenbusStateConnected || + (xendev->state == XenbusStateConnected && + xendrv->is_ready && !xendrv->is_ready(xendev))); +} +static int essential_device_connecting(struct device *dev, void *data) +{ + return is_device_connecting(dev, data, true /* ignore PV[KBB+FB] */); +} +static int non_essential_device_connecting(struct device *dev, void *data) +{ + return is_device_connecting(dev, data, false); +} + +static int exists_essential_connecting_device(struct device_driver *drv) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + essential_device_connecting); +} +static int exists_non_essential_connecting_device(struct device_driver *drv) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + non_essential_device_connecting); +} + +static int print_device_status(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + + /* Is this operation limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + + if (!dev->driver) { + /* Information only: is this too noisy? */ + pr_info("Device with no driver: %s\n", xendev->nodename); + } else if (xendev->state < XenbusStateConnected) { + enum xenbus_state rstate = XenbusStateUnknown; + if (xendev->otherend) + rstate = xenbus_read_driver_state(xendev->otherend); + pr_warn("Timeout connecting to device: %s (local state %d, remote state %d)\n", + xendev->nodename, xendev->state, rstate); + } + + return 0; +} + +/* We only wait for device setup after most initcalls have run. */ +static int ready_to_wait_for_devices; + +static bool wait_loop(unsigned long start, unsigned int max_delay, + unsigned int *seconds_waited) +{ + if (time_after(jiffies, start + (*seconds_waited+5)*HZ)) { + if (!*seconds_waited) + pr_warn("Waiting for devices to initialise: "); + *seconds_waited += 5; + pr_cont("%us...", max_delay - *seconds_waited); + if (*seconds_waited == max_delay) { + pr_cont("\n"); + return true; + } + } + + schedule_timeout_interruptible(HZ/10); + + return false; +} +/* + * On a 5-minute timeout, wait for all devices currently configured. We need + * to do this to guarantee that the filesystems and / or network devices + * needed for boot are available, before we can allow the boot to proceed. + * + * This needs to be on a late_initcall, to happen after the frontend device + * drivers have been initialised, but before the root fs is mounted. + * + * A possible improvement here would be to have the tools add a per-device + * flag to the store entry, indicating whether it is needed at boot time. + * This would allow people who knew what they were doing to accelerate their + * boot slightly, but of course needs tools or manual intervention to set up + * those flags correctly. + */ +static void wait_for_devices(struct xenbus_driver *xendrv) +{ + unsigned long start = jiffies; + struct device_driver *drv = xendrv ? &xendrv->driver : NULL; + unsigned int seconds_waited = 0; + + if (!ready_to_wait_for_devices || !xen_domain()) + return; + + while (exists_non_essential_connecting_device(drv)) + if (wait_loop(start, 30, &seconds_waited)) + break; + + /* Skips PVKB and PVFB check.*/ + while (exists_essential_connecting_device(drv)) + if (wait_loop(start, 270, &seconds_waited)) + break; + + if (seconds_waited) + printk("\n"); + + bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + print_device_status); +} + +int __xenbus_register_frontend(struct xenbus_driver *drv, struct module *owner, + const char *mod_name) +{ + int ret; + + drv->read_otherend_details = read_backend_details; + + ret = xenbus_register_driver_common(drv, &xenbus_frontend, + owner, mod_name); + if (ret) + return ret; + + /* If this driver is loaded as a module wait for devices to attach. */ + wait_for_devices(drv); + + return 0; +} +EXPORT_SYMBOL_GPL(__xenbus_register_frontend); + +static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); +static int backend_state; + +static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, + const char *path, const char *token) +{ + if (xenbus_scanf(XBT_NIL, path, "", "%i", + &backend_state) != 1) + backend_state = XenbusStateUnknown; + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + path, xenbus_strstate(backend_state)); + wake_up(&backend_state_wq); +} + +static void xenbus_reset_wait_for_backend(char *be, int expected) +{ + long timeout; + timeout = wait_event_interruptible_timeout(backend_state_wq, + backend_state == expected, 5 * HZ); + if (timeout <= 0) + pr_info("backend %s timed out\n", be); +} + +/* + * Reset frontend if it is in Connected or Closed state. + * Wait for backend to catch up. + * State Connected happens during kdump, Closed after kexec. + */ +static void xenbus_reset_frontend(char *fe, char *be, int be_state) +{ + struct xenbus_watch be_watch; + + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + be, xenbus_strstate(be_state)); + + memset(&be_watch, 0, sizeof(be_watch)); + be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be); + if (!be_watch.node) + return; + + be_watch.callback = xenbus_reset_backend_state_changed; + backend_state = XenbusStateUnknown; + + pr_info("triggering reconnect on %s\n", be); + register_xenbus_watch(&be_watch); + + /* fall through to forward backend to state XenbusStateInitialising */ + switch (be_state) { + case XenbusStateConnected: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing); + xenbus_reset_wait_for_backend(be, XenbusStateClosing); + fallthrough; + + case XenbusStateClosing: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed); + xenbus_reset_wait_for_backend(be, XenbusStateClosed); + fallthrough; + + case XenbusStateClosed: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising); + xenbus_reset_wait_for_backend(be, XenbusStateInitWait); + } + + unregister_xenbus_watch(&be_watch); + pr_info("reconnect done on %s\n", be); + kfree(be_watch.node); +} + +static void xenbus_check_frontend(char *class, char *dev) +{ + int be_state, fe_state, err; + char *backend, *frontend; + + frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev); + if (!frontend) + return; + + err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state); + if (err != 1) + goto out; + + switch (fe_state) { + case XenbusStateConnected: + case XenbusStateClosed: + printk(KERN_DEBUG "XENBUS: frontend %s %s\n", + frontend, xenbus_strstate(fe_state)); + backend = xenbus_read(XBT_NIL, frontend, "backend", NULL); + if (!backend || IS_ERR(backend)) + goto out; + err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state); + if (err == 1) + xenbus_reset_frontend(frontend, backend, be_state); + kfree(backend); + break; + default: + break; + } +out: + kfree(frontend); +} + +static void xenbus_reset_state(void) +{ + char **devclass, **dev; + int devclass_n, dev_n; + int i, j; + + devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n); + if (IS_ERR(devclass)) + return; + + for (i = 0; i < devclass_n; i++) { + dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n); + if (IS_ERR(dev)) + continue; + for (j = 0; j < dev_n; j++) + xenbus_check_frontend(devclass[i], dev[j]); + kfree(dev); + } + kfree(devclass); +} + +static int frontend_probe_and_watch(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + /* reset devices in Connected or Closed state */ + if (xen_hvm_domain()) + xenbus_reset_state(); + /* Enumerate devices in xenstore and watch for changes. */ + xenbus_probe_devices(&xenbus_frontend); + register_xenbus_watch(&fe_watch); + + return NOTIFY_DONE; +} + + +static int __init xenbus_probe_frontend_init(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = frontend_probe_and_watch + }; + int err; + + DPRINTK(""); + + /* Register ourselves with the kernel bus subsystem */ + err = bus_register(&xenbus_frontend.bus); + if (err) + return err; + + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} +subsys_initcall(xenbus_probe_frontend_init); + +#ifndef MODULE +static int __init boot_wait_for_devices(void) +{ + if (!xen_has_pv_devices()) + return -ENODEV; + + ready_to_wait_for_devices = 1; + wait_for_devices(NULL); + return 0; +} + +late_initcall(boot_wait_for_devices); +#endif + +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c new file mode 100644 index 000000000..12e02eb01 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -0,0 +1,956 @@ +/****************************************************************************** + * xenbus_xs.c + * + * This is the kernel equivalent of the "xs" library. We don't need everything + * and we use xenbus_comms for communication. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/unistd.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/uio.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/fcntl.h> +#include <linux/kthread.h> +#include <linux/reboot.h> +#include <linux/rwsem.h> +#include <linux/mutex.h> +#include <asm/xen/hypervisor.h> +#include <xen/xenbus.h> +#include <xen/xen.h> +#include "xenbus.h" + +/* + * Framework to protect suspend/resume handling against normal Xenstore + * message handling: + * During suspend/resume there must be no open transaction and no pending + * Xenstore request. + * New watch events happening in this time can be ignored by firing all watches + * after resume. + */ + +/* Lock protecting enter/exit critical region. */ +static DEFINE_SPINLOCK(xs_state_lock); +/* Number of users in critical region (protected by xs_state_lock). */ +static unsigned int xs_state_users; +/* Suspend handler waiting or already active (protected by xs_state_lock)? */ +static int xs_suspend_active; +/* Unique Xenstore request id (protected by xs_state_lock). */ +static uint32_t xs_request_id; + +/* Wait queue for all callers waiting for critical region to become usable. */ +static DECLARE_WAIT_QUEUE_HEAD(xs_state_enter_wq); +/* Wait queue for suspend handling waiting for critical region being empty. */ +static DECLARE_WAIT_QUEUE_HEAD(xs_state_exit_wq); + +/* List of registered watches, and a lock to protect it. */ +static LIST_HEAD(watches); +static DEFINE_SPINLOCK(watches_lock); + +/* List of pending watch callback events, and a lock to protect it. */ +static LIST_HEAD(watch_events); +static DEFINE_SPINLOCK(watch_events_lock); + +/* Protect watch (de)register against save/restore. */ +static DECLARE_RWSEM(xs_watch_rwsem); + +/* + * Details of the xenwatch callback kernel thread. The thread waits on the + * watch_events_waitq for work to do (queued on watch_events list). When it + * wakes up it acquires the xenwatch_mutex before reading the list and + * carrying out work. + */ +static pid_t xenwatch_pid; +static DEFINE_MUTEX(xenwatch_mutex); +static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq); + +static void xs_suspend_enter(void) +{ + spin_lock(&xs_state_lock); + xs_suspend_active++; + spin_unlock(&xs_state_lock); + wait_event(xs_state_exit_wq, xs_state_users == 0); +} + +static void xs_suspend_exit(void) +{ + xb_dev_generation_id++; + spin_lock(&xs_state_lock); + xs_suspend_active--; + spin_unlock(&xs_state_lock); + wake_up_all(&xs_state_enter_wq); +} + +static uint32_t xs_request_enter(struct xb_req_data *req) +{ + uint32_t rq_id; + + req->type = req->msg.type; + + spin_lock(&xs_state_lock); + + while (!xs_state_users && xs_suspend_active) { + spin_unlock(&xs_state_lock); + wait_event(xs_state_enter_wq, xs_suspend_active == 0); + spin_lock(&xs_state_lock); + } + + if (req->type == XS_TRANSACTION_START && !req->user_req) + xs_state_users++; + xs_state_users++; + rq_id = xs_request_id++; + + spin_unlock(&xs_state_lock); + + return rq_id; +} + +void xs_request_exit(struct xb_req_data *req) +{ + spin_lock(&xs_state_lock); + xs_state_users--; + if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) || + (req->type == XS_TRANSACTION_END && !req->user_req && + !WARN_ON_ONCE(req->msg.type == XS_ERROR && + !strcmp(req->body, "ENOENT")))) + xs_state_users--; + spin_unlock(&xs_state_lock); + + if (xs_suspend_active && !xs_state_users) + wake_up(&xs_state_exit_wq); +} + +static int get_error(const char *errorstring) +{ + unsigned int i; + + for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) { + if (i == ARRAY_SIZE(xsd_errors) - 1) { + pr_warn("xen store gave: unknown error %s\n", + errorstring); + return EINVAL; + } + } + return xsd_errors[i].errnum; +} + +static bool xenbus_ok(void) +{ + switch (xen_store_domain_type) { + case XS_LOCAL: + switch (system_state) { + case SYSTEM_POWER_OFF: + case SYSTEM_RESTART: + case SYSTEM_HALT: + return false; + default: + break; + } + return true; + case XS_PV: + case XS_HVM: + /* FIXME: Could check that the remote domain is alive, + * but it is normally initial domain. */ + return true; + default: + break; + } + return false; +} + +static bool test_reply(struct xb_req_data *req) +{ + if (req->state == xb_req_state_got_reply || !xenbus_ok()) { + /* read req->state before all other fields */ + virt_rmb(); + return true; + } + + /* Make sure to reread req->state each time. */ + barrier(); + + return false; +} + +static void *read_reply(struct xb_req_data *req) +{ + do { + wait_event(req->wq, test_reply(req)); + + if (!xenbus_ok()) + /* + * If we are in the process of being shut-down there is + * no point of trying to contact XenBus - it is either + * killed (xenstored application) or the other domain + * has been killed or is unreachable. + */ + return ERR_PTR(-EIO); + if (req->err) + return ERR_PTR(req->err); + + } while (req->state != xb_req_state_got_reply); + + return req->body; +} + +static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg) +{ + bool notify; + + req->msg = *msg; + req->err = 0; + req->state = xb_req_state_queued; + init_waitqueue_head(&req->wq); + + /* Save the caller req_id and restore it later in the reply */ + req->caller_req_id = req->msg.req_id; + req->msg.req_id = xs_request_enter(req); + + mutex_lock(&xb_write_mutex); + list_add_tail(&req->list, &xb_write_list); + notify = list_is_singular(&xb_write_list); + mutex_unlock(&xb_write_mutex); + + if (notify) + wake_up(&xb_waitq); +} + +static void *xs_wait_for_reply(struct xb_req_data *req, struct xsd_sockmsg *msg) +{ + void *ret; + + ret = read_reply(req); + + xs_request_exit(req); + + msg->type = req->msg.type; + msg->len = req->msg.len; + + mutex_lock(&xb_write_mutex); + if (req->state == xb_req_state_queued || + req->state == xb_req_state_wait_reply) + req->state = xb_req_state_aborted; + else + kfree(req); + mutex_unlock(&xb_write_mutex); + + return ret; +} + +static void xs_wake_up(struct xb_req_data *req) +{ + wake_up(&req->wq); +} + +int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par) +{ + struct xb_req_data *req; + struct kvec *vec; + + req = kmalloc(sizeof(*req) + sizeof(*vec), GFP_KERNEL); + if (!req) + return -ENOMEM; + + vec = (struct kvec *)(req + 1); + vec->iov_len = msg->len; + vec->iov_base = msg + 1; + + req->vec = vec; + req->num_vecs = 1; + req->cb = xenbus_dev_queue_reply; + req->par = par; + req->user_req = true; + + xs_send(req, msg); + + return 0; +} +EXPORT_SYMBOL(xenbus_dev_request_and_reply); + +/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ +static void *xs_talkv(struct xenbus_transaction t, + enum xsd_sockmsg_type type, + const struct kvec *iovec, + unsigned int num_vecs, + unsigned int *len) +{ + struct xb_req_data *req; + struct xsd_sockmsg msg; + void *ret = NULL; + unsigned int i; + int err; + + req = kmalloc(sizeof(*req), GFP_NOIO | __GFP_HIGH); + if (!req) + return ERR_PTR(-ENOMEM); + + req->vec = iovec; + req->num_vecs = num_vecs; + req->cb = xs_wake_up; + req->user_req = false; + + msg.req_id = 0; + msg.tx_id = t.id; + msg.type = type; + msg.len = 0; + for (i = 0; i < num_vecs; i++) + msg.len += iovec[i].iov_len; + + xs_send(req, &msg); + + ret = xs_wait_for_reply(req, &msg); + if (len) + *len = msg.len; + + if (IS_ERR(ret)) + return ret; + + if (msg.type == XS_ERROR) { + err = get_error(ret); + kfree(ret); + return ERR_PTR(-err); + } + + if (msg.type != type) { + pr_warn_ratelimited("unexpected type [%d], expected [%d]\n", + msg.type, type); + kfree(ret); + return ERR_PTR(-EINVAL); + } + return ret; +} + +/* Simplified version of xs_talkv: single message. */ +static void *xs_single(struct xenbus_transaction t, + enum xsd_sockmsg_type type, + const char *string, + unsigned int *len) +{ + struct kvec iovec; + + iovec.iov_base = (void *)string; + iovec.iov_len = strlen(string) + 1; + return xs_talkv(t, type, &iovec, 1, len); +} + +/* Many commands only need an ack, don't care what it says. */ +static int xs_error(char *reply) +{ + if (IS_ERR(reply)) + return PTR_ERR(reply); + kfree(reply); + return 0; +} + +static unsigned int count_strings(const char *strings, unsigned int len) +{ + unsigned int num; + const char *p; + + for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) + num++; + + return num; +} + +/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ +static char *join(const char *dir, const char *name) +{ + char *buffer; + + if (strlen(name) == 0) + buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s", dir); + else + buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", dir, name); + return (!buffer) ? ERR_PTR(-ENOMEM) : buffer; +} + +static char **split(char *strings, unsigned int len, unsigned int *num) +{ + char *p, **ret; + + /* Count the strings. */ + *num = count_strings(strings, len); + + /* Transfer to one big alloc for easy freeing. */ + ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH); + if (!ret) { + kfree(strings); + return ERR_PTR(-ENOMEM); + } + memcpy(&ret[*num], strings, len); + kfree(strings); + + strings = (char *)&ret[*num]; + for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) + ret[(*num)++] = p; + + return ret; +} + +char **xenbus_directory(struct xenbus_transaction t, + const char *dir, const char *node, unsigned int *num) +{ + char *strings, *path; + unsigned int len; + + path = join(dir, node); + if (IS_ERR(path)) + return (char **)path; + + strings = xs_single(t, XS_DIRECTORY, path, &len); + kfree(path); + if (IS_ERR(strings)) + return (char **)strings; + + return split(strings, len, num); +} +EXPORT_SYMBOL_GPL(xenbus_directory); + +/* Check if a path exists. Return 1 if it does. */ +int xenbus_exists(struct xenbus_transaction t, + const char *dir, const char *node) +{ + char **d; + int dir_n; + + d = xenbus_directory(t, dir, node, &dir_n); + if (IS_ERR(d)) + return 0; + kfree(d); + return 1; +} +EXPORT_SYMBOL_GPL(xenbus_exists); + +/* Get the value of a single file. + * Returns a kmalloced value: call free() on it after use. + * len indicates length in bytes. + */ +void *xenbus_read(struct xenbus_transaction t, + const char *dir, const char *node, unsigned int *len) +{ + char *path; + void *ret; + + path = join(dir, node); + if (IS_ERR(path)) + return (void *)path; + + ret = xs_single(t, XS_READ, path, len); + kfree(path); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_read); + +/* Write the value of a single file. + * Returns -err on failure. + */ +int xenbus_write(struct xenbus_transaction t, + const char *dir, const char *node, const char *string) +{ + const char *path; + struct kvec iovec[2]; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + iovec[0].iov_base = (void *)path; + iovec[0].iov_len = strlen(path) + 1; + iovec[1].iov_base = (void *)string; + iovec[1].iov_len = strlen(string); + + ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_write); + +/* Create a new directory. */ +int xenbus_mkdir(struct xenbus_transaction t, + const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = xs_error(xs_single(t, XS_MKDIR, path, NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_mkdir); + +/* Destroy a file or directory (directories must be empty). */ +int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = xs_error(xs_single(t, XS_RM, path, NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_rm); + +/* Start a transaction: changes by others will not be seen during this + * transaction, and changes will not be visible to others until end. + */ +int xenbus_transaction_start(struct xenbus_transaction *t) +{ + char *id_str; + + id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL); + if (IS_ERR(id_str)) + return PTR_ERR(id_str); + + t->id = simple_strtoul(id_str, NULL, 0); + kfree(id_str); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_transaction_start); + +/* End a transaction. + * If abandon is true, transaction is discarded instead of committed. + */ +int xenbus_transaction_end(struct xenbus_transaction t, int abort) +{ + char abortstr[2]; + + if (abort) + strcpy(abortstr, "F"); + else + strcpy(abortstr, "T"); + + return xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); +} +EXPORT_SYMBOL_GPL(xenbus_transaction_end); + +/* Single read and scanf: returns -errno or num scanned. */ +int xenbus_scanf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; + char *val; + + val = xenbus_read(t, dir, node, NULL); + if (IS_ERR(val)) + return PTR_ERR(val); + + va_start(ap, fmt); + ret = vsscanf(val, fmt, ap); + va_end(ap); + kfree(val); + /* Distinctive errno. */ + if (ret == 0) + return -ERANGE; + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_scanf); + +/* Read an (optional) unsigned value. */ +unsigned int xenbus_read_unsigned(const char *dir, const char *node, + unsigned int default_val) +{ + unsigned int val; + int ret; + + ret = xenbus_scanf(XBT_NIL, dir, node, "%u", &val); + if (ret <= 0) + val = default_val; + + return val; +} +EXPORT_SYMBOL_GPL(xenbus_read_unsigned); + +/* Single printf and write: returns -errno or 0. */ +int xenbus_printf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; + char *buf; + + va_start(ap, fmt); + buf = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap); + va_end(ap); + + if (!buf) + return -ENOMEM; + + ret = xenbus_write(t, dir, node, buf); + + kfree(buf); + + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_printf); + +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ +int xenbus_gather(struct xenbus_transaction t, const char *dir, ...) +{ + va_list ap; + const char *name; + int ret = 0; + + va_start(ap, dir); + while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { + const char *fmt = va_arg(ap, char *); + void *result = va_arg(ap, void *); + char *p; + + p = xenbus_read(t, dir, name, NULL); + if (IS_ERR(p)) { + ret = PTR_ERR(p); + break; + } + if (fmt) { + if (sscanf(p, fmt, result) == 0) + ret = -EINVAL; + kfree(p); + } else + *(char **)result = p; + } + va_end(ap); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_gather); + +static int xs_watch(const char *path, const char *token) +{ + struct kvec iov[2]; + + iov[0].iov_base = (void *)path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (void *)token; + iov[1].iov_len = strlen(token) + 1; + + return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov, + ARRAY_SIZE(iov), NULL)); +} + +static int xs_unwatch(const char *path, const char *token) +{ + struct kvec iov[2]; + + iov[0].iov_base = (char *)path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (char *)token; + iov[1].iov_len = strlen(token) + 1; + + return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov, + ARRAY_SIZE(iov), NULL)); +} + +static struct xenbus_watch *find_watch(const char *token) +{ + struct xenbus_watch *i, *cmp; + + cmp = (void *)simple_strtoul(token, NULL, 16); + + list_for_each_entry(i, &watches, list) + if (i == cmp) + return i; + + return NULL; +} + +int xs_watch_msg(struct xs_watch_event *event) +{ + if (count_strings(event->body, event->len) != 2) { + kfree(event); + return -EINVAL; + } + event->path = (const char *)event->body; + event->token = (const char *)strchr(event->body, '\0') + 1; + + spin_lock(&watches_lock); + event->handle = find_watch(event->token); + if (event->handle != NULL && + (!event->handle->will_handle || + event->handle->will_handle(event->handle, + event->path, event->token))) { + spin_lock(&watch_events_lock); + list_add_tail(&event->list, &watch_events); + event->handle->nr_pending++; + wake_up(&watch_events_waitq); + spin_unlock(&watch_events_lock); + } else + kfree(event); + spin_unlock(&watches_lock); + + return 0; +} + +/* + * Certain older XenBus toolstack cannot handle reading values that are + * not populated. Some Xen 3.4 installation are incapable of doing this + * so if we are running on anything older than 4 do not attempt to read + * control/platform-feature-xs_reset_watches. + */ +static bool xen_strict_xenbus_quirk(void) +{ +#ifdef CONFIG_X86 + uint32_t eax, ebx, ecx, edx, base; + + base = xen_cpuid_base(); + cpuid(base + 1, &eax, &ebx, &ecx, &edx); + + if ((eax >> 16) < 4) + return true; +#endif + return false; + +} +static void xs_reset_watches(void) +{ + int err; + + if (!xen_hvm_domain() || xen_initial_domain()) + return; + + if (xen_strict_xenbus_quirk()) + return; + + if (!xenbus_read_unsigned("control", + "platform-feature-xs_reset_watches", 0)) + return; + + err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL)); + if (err && err != -EEXIST) + pr_warn("xs_reset_watches failed: %d\n", err); +} + +/* Register callback to watch this node. */ +int register_xenbus_watch(struct xenbus_watch *watch) +{ + /* Pointer in ascii is the token. */ + char token[sizeof(watch) * 2 + 1]; + int err; + + sprintf(token, "%lX", (long)watch); + + watch->nr_pending = 0; + + down_read(&xs_watch_rwsem); + + spin_lock(&watches_lock); + BUG_ON(find_watch(token)); + list_add(&watch->list, &watches); + spin_unlock(&watches_lock); + + err = xs_watch(watch->node, token); + + if (err) { + spin_lock(&watches_lock); + list_del(&watch->list); + spin_unlock(&watches_lock); + } + + up_read(&xs_watch_rwsem); + + return err; +} +EXPORT_SYMBOL_GPL(register_xenbus_watch); + +void unregister_xenbus_watch(struct xenbus_watch *watch) +{ + struct xs_watch_event *event, *tmp; + char token[sizeof(watch) * 2 + 1]; + int err; + + sprintf(token, "%lX", (long)watch); + + down_read(&xs_watch_rwsem); + + spin_lock(&watches_lock); + BUG_ON(!find_watch(token)); + list_del(&watch->list); + spin_unlock(&watches_lock); + + err = xs_unwatch(watch->node, token); + if (err) + pr_warn("Failed to release watch %s: %i\n", watch->node, err); + + up_read(&xs_watch_rwsem); + + /* Make sure there are no callbacks running currently (unless + its us) */ + if (current->pid != xenwatch_pid) + mutex_lock(&xenwatch_mutex); + + /* Cancel pending watch events. */ + spin_lock(&watch_events_lock); + if (watch->nr_pending) { + list_for_each_entry_safe(event, tmp, &watch_events, list) { + if (event->handle != watch) + continue; + list_del(&event->list); + kfree(event); + } + watch->nr_pending = 0; + } + spin_unlock(&watch_events_lock); + + if (current->pid != xenwatch_pid) + mutex_unlock(&xenwatch_mutex); +} +EXPORT_SYMBOL_GPL(unregister_xenbus_watch); + +void xs_suspend(void) +{ + xs_suspend_enter(); + + down_write(&xs_watch_rwsem); + mutex_lock(&xs_response_mutex); +} + +void xs_resume(void) +{ + struct xenbus_watch *watch; + char token[sizeof(watch) * 2 + 1]; + + xb_init_comms(); + + mutex_unlock(&xs_response_mutex); + + xs_suspend_exit(); + + /* No need for watches_lock: the xs_watch_rwsem is sufficient. */ + list_for_each_entry(watch, &watches, list) { + sprintf(token, "%lX", (long)watch); + xs_watch(watch->node, token); + } + + up_write(&xs_watch_rwsem); +} + +void xs_suspend_cancel(void) +{ + mutex_unlock(&xs_response_mutex); + up_write(&xs_watch_rwsem); + + xs_suspend_exit(); +} + +static int xenwatch_thread(void *unused) +{ + struct xs_watch_event *event; + + xenwatch_pid = current->pid; + + for (;;) { + wait_event_interruptible(watch_events_waitq, + !list_empty(&watch_events)); + + if (kthread_should_stop()) + break; + + mutex_lock(&xenwatch_mutex); + + spin_lock(&watch_events_lock); + event = list_first_entry_or_null(&watch_events, + struct xs_watch_event, list); + if (event) { + list_del(&event->list); + event->handle->nr_pending--; + } + spin_unlock(&watch_events_lock); + + if (event) { + event->handle->callback(event->handle, event->path, + event->token); + kfree(event); + } + + mutex_unlock(&xenwatch_mutex); + } + + return 0; +} + +/* + * Wake up all threads waiting for a xenstore reply. In case of shutdown all + * pending replies will be marked as "aborted" in order to let the waiters + * return in spite of xenstore possibly no longer being able to reply. This + * will avoid blocking shutdown by a thread waiting for xenstore but being + * necessary for shutdown processing to proceed. + */ +static int xs_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + struct xb_req_data *req; + + mutex_lock(&xb_write_mutex); + list_for_each_entry(req, &xs_reply_list, list) + wake_up(&req->wq); + list_for_each_entry(req, &xb_write_list, list) + wake_up(&req->wq); + mutex_unlock(&xb_write_mutex); + return NOTIFY_DONE; +} + +static struct notifier_block xs_reboot_nb = { + .notifier_call = xs_reboot_notify, +}; + +int xs_init(void) +{ + int err; + struct task_struct *task; + + register_reboot_notifier(&xs_reboot_nb); + + /* Initialize the shared memory rings to talk to xenstored */ + err = xb_init_comms(); + if (err) + return err; + + task = kthread_run(xenwatch_thread, NULL, "xenwatch"); + if (IS_ERR(task)) + return PTR_ERR(task); + + /* shutdown watches for kexec boot */ + xs_reset_watches(); + + return 0; +} |