10 files changed, 5303 insertions, 0 deletions
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile
new file mode 100644
index 000000000..0c7532110
--- /dev/null
+++ b/drivers/xen/xenbus/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y	+= xenbus.o
+obj-y	+= xenbus_dev_frontend.o
+
+xenbus-objs =
+xenbus-objs += xenbus_client.o
+xenbus-objs += xenbus_comms.o
+xenbus-objs += xenbus_xs.o
+xenbus-objs += xenbus_probe.o
+
+xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
+xenbus-objs += $(xenbus-be-objs-y)
+
+obj-$(CONFIG_XEN_BACKEND) += xenbus_dev_backend.o
+obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o
diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h
new file mode 100644
index 000000000..2754bdfad
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus.h
@@ -0,0 +1,139 @@
+/*
+ * Private include for xenbus communications.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 XenSource Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XENBUS_XENBUS_H
+#define _XENBUS_XENBUS_H
+
+#include <linux/mutex.h>
+#include <linux/uio.h>
+#include <xen/xenbus.h>
+
+#define XEN_BUS_ID_SIZE			20
+
+struct xen_bus_type {
+	char *root;
+	unsigned int levels;
+	int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename);
+	int (*probe)(struct xen_bus_type *bus, const char *type,
+		     const char *dir);
+	bool (*otherend_will_handle)(struct xenbus_watch *watch,
+				     const char *path, const char *token);
+	void (*otherend_changed)(struct xenbus_watch *watch, const char *path,
+				 const char *token);
+	struct bus_type bus;
+};
+
+enum xenstore_init {
+	XS_UNKNOWN,
+	XS_PV,
+	XS_HVM,
+	XS_LOCAL,
+};
+
+struct xs_watch_event {
+	struct list_head list;
+	unsigned int len;
+	struct xenbus_watch *handle;
+	const char *path;
+	const char *token;
+	char body[];
+};
+
+enum xb_req_state {
+	xb_req_state_queued,
+	xb_req_state_wait_reply,
+	xb_req_state_got_reply,
+	xb_req_state_aborted
+};
+
+struct xb_req_data {
+	struct list_head list;
+	wait_queue_head_t wq;
+	struct xsd_sockmsg msg;
+	uint32_t caller_req_id;
+	enum xsd_sockmsg_type type;
+	char *body;
+	const struct kvec *vec;
+	int num_vecs;
+	int err;
+	enum xb_req_state state;
+	bool user_req;
+	void (*cb)(struct xb_req_data *);
+	void *par;
+};
+
+extern enum xenstore_init xen_store_domain_type;
+extern const struct attribute_group *xenbus_dev_groups[];
+extern struct mutex xs_response_mutex;
+extern struct list_head xs_reply_list;
+extern struct list_head xb_write_list;
+extern wait_queue_head_t xb_waitq;
+extern struct mutex xb_write_mutex;
+
+int xs_init(void);
+int xb_init_comms(void);
+void xb_deinit_comms(void);
+int xs_watch_msg(struct xs_watch_event *event);
+void xs_request_exit(struct xb_req_data *req);
+
+int xenbus_match(struct device *_dev, struct device_driver *_drv);
+int xenbus_dev_probe(struct device *_dev);
+void xenbus_dev_remove(struct device *_dev);
+int xenbus_register_driver_common(struct xenbus_driver *drv,
+				  struct xen_bus_type *bus,
+				  struct module *owner,
+				  const char *mod_name);
+int xenbus_probe_node(struct xen_bus_type *bus,
+		      const char *type,
+		      const char *nodename);
+int xenbus_probe_devices(struct xen_bus_type *bus);
+
+void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
+
+int xenbus_dev_suspend(struct device *dev);
+int xenbus_dev_resume(struct device *dev);
+int xenbus_dev_cancel(struct device *dev);
+
+void xenbus_otherend_changed(struct xenbus_watch *watch,
+			     const char *path, const char *token,
+			     int ignore_on_shutdown);
+
+int xenbus_read_otherend_details(struct xenbus_device *xendev,
+				 char *id_node, char *path_node);
+
+void xenbus_ring_ops_init(void);
+
+int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par);
+void xenbus_dev_queue_reply(struct xb_req_data *req);
+
+extern unsigned int xb_dev_generation_id;
+
+#endif
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
new file mode 100644
index 000000000..d4b251925
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -0,0 +1,949 @@
+/******************************************************************************
+ * Client-facing interface for the Xenbus driver.  In other words, the
+ * interface between the Xenbus and the device-specific code, be it the
+ * frontend or the backend of that driver.
+ *
+ * Copyright (C) 2005 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/export.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/page.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+#include <xen/balloon.h>
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <xen/xenbus.h>
+#include <xen/xen.h>
+#include <xen/features.h>
+
+#include "xenbus.h"
+
+#define XENBUS_PAGES(_grants)	(DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE))
+
+#define XENBUS_MAX_RING_PAGES	(XENBUS_PAGES(XENBUS_MAX_RING_GRANTS))
+
+struct xenbus_map_node {
+	struct list_head next;
+	union {
+		struct {
+			struct vm_struct *area;
+		} pv;
+		struct {
+			struct page *pages[XENBUS_MAX_RING_PAGES];
+			unsigned long addrs[XENBUS_MAX_RING_GRANTS];
+			void *addr;
+		} hvm;
+	};
+	grant_handle_t handles[XENBUS_MAX_RING_GRANTS];
+	unsigned int   nr_handles;
+};
+
+struct map_ring_valloc {
+	struct xenbus_map_node *node;
+
+	/* Why do we need two arrays? See comment of __xenbus_map_ring */
+	unsigned long addrs[XENBUS_MAX_RING_GRANTS];
+	phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
+
+	struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
+	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
+
+	unsigned int idx;
+};
+
+static DEFINE_SPINLOCK(xenbus_valloc_lock);
+static LIST_HEAD(xenbus_valloc_pages);
+
+struct xenbus_ring_ops {
+	int (*map)(struct xenbus_device *dev, struct map_ring_valloc *info,
+		   grant_ref_t *gnt_refs, unsigned int nr_grefs,
+		   void **vaddr);
+	int (*unmap)(struct xenbus_device *dev, void *vaddr);
+};
+
+static const struct xenbus_ring_ops *ring_ops __read_mostly;
+
+const char *xenbus_strstate(enum xenbus_state state)
+{
+	static const char *const name[] = {
+		[ XenbusStateUnknown      ] = "Unknown",
+		[ XenbusStateInitialising ] = "Initialising",
+		[ XenbusStateInitWait     ] = "InitWait",
+		[ XenbusStateInitialised  ] = "Initialised",
+		[ XenbusStateConnected    ] = "Connected",
+		[ XenbusStateClosing      ] = "Closing",
+		[ XenbusStateClosed	  ] = "Closed",
+		[XenbusStateReconfiguring] = "Reconfiguring",
+		[XenbusStateReconfigured] = "Reconfigured",
+	};
+	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+EXPORT_SYMBOL_GPL(xenbus_strstate);
+
+/**
+ * xenbus_watch_path - register a watch
+ * @dev: xenbus device
+ * @path: path to watch
+ * @watch: watch to register
+ * @callback: callback to register
+ *
+ * Register a @watch on the given path, using the given xenbus_watch structure
+ * for storage, and the given @callback function as the callback.  Return 0 on
+ * success, or -errno on error.  On success, the given @path will be saved as
+ * @watch->node, and remains the caller's to free.  On error, @watch->node will
+ * be NULL, the device will switch to %XenbusStateClosing, and the error will
+ * be saved in the store.
+ */
+int xenbus_watch_path(struct xenbus_device *dev, const char *path,
+		      struct xenbus_watch *watch,
+		      bool (*will_handle)(struct xenbus_watch *,
+					  const char *, const char *),
+		      void (*callback)(struct xenbus_watch *,
+				       const char *, const char *))
+{
+	int err;
+
+	watch->node = path;
+	watch->will_handle = will_handle;
+	watch->callback = callback;
+
+	err = register_xenbus_watch(watch);
+
+	if (err) {
+		watch->node = NULL;
+		watch->will_handle = NULL;
+		watch->callback = NULL;
+		xenbus_dev_fatal(dev, err, "adding watch on %s", path);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_watch_path);
+
+
+/**
+ * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
+ * @dev: xenbus device
+ * @watch: watch to register
+ * @callback: callback to register
+ * @pathfmt: format of path to watch
+ *
+ * Register a watch on the given @path, using the given xenbus_watch
+ * structure for storage, and the given @callback function as the callback.
+ * Return 0 on success, or -errno on error.  On success, the watched path
+ * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
+ * kfree().  On error, watch->node will be NULL, so the caller has nothing to
+ * free, the device will switch to %XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_watch_pathfmt(struct xenbus_device *dev,
+			 struct xenbus_watch *watch,
+			 bool (*will_handle)(struct xenbus_watch *,
+					const char *, const char *),
+			 void (*callback)(struct xenbus_watch *,
+					  const char *, const char *),
+			 const char *pathfmt, ...)
+{
+	int err;
+	va_list ap;
+	char *path;
+
+	va_start(ap, pathfmt);
+	path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
+	va_end(ap);
+
+	if (!path) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
+		return -ENOMEM;
+	}
+	err = xenbus_watch_path(dev, path, watch, will_handle, callback);
+
+	if (err)
+		kfree(path);
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
+
+static void xenbus_switch_fatal(struct xenbus_device *, int, int,
+				const char *, ...);
+
+static int
+__xenbus_switch_state(struct xenbus_device *dev,
+		      enum xenbus_state state, int depth)
+{
+	/* We check whether the state is currently set to the given value, and
+	   if not, then the state is set.  We don't want to unconditionally
+	   write the given state, because we don't want to fire watches
+	   unnecessarily.  Furthermore, if the node has gone, we don't write
+	   to it, as the device will be tearing down, and we don't want to
+	   resurrect that directory.
+
+	   Note that, because of this cached value of our state, this
+	   function will not take a caller's Xenstore transaction
+	   (something it was trying to in the past) because dev->state
+	   would not get reset if the transaction was aborted.
+	 */
+
+	struct xenbus_transaction xbt;
+	int current_state;
+	int err, abort;
+
+	if (state == dev->state)
+		return 0;
+
+again:
+	abort = 1;
+
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_switch_fatal(dev, depth, err, "starting transaction");
+		return 0;
+	}
+
+	err = xenbus_scanf(xbt, dev->nodename, "state", "%d", &current_state);
+	if (err != 1)
+		goto abort;
+
+	err = xenbus_printf(xbt, dev->nodename, "state", "%d", state);
+	if (err) {
+		xenbus_switch_fatal(dev, depth, err, "writing new state");
+		goto abort;
+	}
+
+	abort = 0;
+abort:
+	err = xenbus_transaction_end(xbt, abort);
+	if (err) {
+		if (err == -EAGAIN && !abort)
+			goto again;
+		xenbus_switch_fatal(dev, depth, err, "ending transaction");
+	} else
+		dev->state = state;
+
+	return 0;
+}
+
+/**
+ * xenbus_switch_state
+ * @dev: xenbus device
+ * @state: new state
+ *
+ * Advertise in the store a change of the given driver to the given new_state.
+ * Return 0 on success, or -errno on error.  On error, the device will switch
+ * to XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
+{
+	return __xenbus_switch_state(dev, state, 0);
+}
+
+EXPORT_SYMBOL_GPL(xenbus_switch_state);
+
+int xenbus_frontend_closed(struct xenbus_device *dev)
+{
+	xenbus_switch_state(dev, XenbusStateClosed);
+	complete(&dev->down);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
+
+static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
+				const char *fmt, va_list ap)
+{
+	unsigned int len;
+	char *printf_buffer;
+	char *path_buffer;
+
+#define PRINTF_BUFFER_SIZE 4096
+
+	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
+	if (!printf_buffer)
+		return;
+
+	len = sprintf(printf_buffer, "%i ", -err);
+	vsnprintf(printf_buffer + len, PRINTF_BUFFER_SIZE - len, fmt, ap);
+
+	dev_err(&dev->dev, "%s\n", printf_buffer);
+
+	path_buffer = kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
+	if (path_buffer)
+		xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer);
+
+	kfree(printf_buffer);
+	kfree(path_buffer);
+}
+
+/**
+ * xenbus_dev_error
+ * @dev: xenbus device
+ * @err: error to report
+ * @fmt: error message format
+ *
+ * Report the given negative errno into the store, along with the given
+ * formatted message.
+ */
+void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	xenbus_va_dev_error(dev, err, fmt, ap);
+	va_end(ap);
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_error);
+
+/**
+ * xenbus_dev_fatal
+ * @dev: xenbus device
+ * @err: error to report
+ * @fmt: error message format
+ *
+ * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
+ * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly
+ * closedown of this driver and its peer.
+ */
+
+void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	xenbus_va_dev_error(dev, err, fmt, ap);
+	va_end(ap);
+
+	xenbus_switch_state(dev, XenbusStateClosing);
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
+
+/**
+ * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps
+ * avoiding recursion within xenbus_switch_state.
+ */
+static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
+				const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	xenbus_va_dev_error(dev, err, fmt, ap);
+	va_end(ap);
+
+	if (!depth)
+		__xenbus_switch_state(dev, XenbusStateClosing, 1);
+}
+
+/*
+ * xenbus_setup_ring
+ * @dev: xenbus device
+ * @vaddr: pointer to starting virtual address of the ring
+ * @nr_pages: number of pages to be granted
+ * @grefs: grant reference array to be filled in
+ *
+ * Allocate physically contiguous pages for a shared ring buffer and grant it
+ * to the peer of the given device. The ring buffer is initially filled with
+ * zeroes. The virtual address of the ring is stored at @vaddr and the
+ * grant references are stored in the @grefs array. In case of error @vaddr
+ * will be set to NULL and @grefs will be filled with INVALID_GRANT_REF.
+ */
+int xenbus_setup_ring(struct xenbus_device *dev, gfp_t gfp, void **vaddr,
+		      unsigned int nr_pages, grant_ref_t *grefs)
+{
+	unsigned long ring_size = nr_pages * XEN_PAGE_SIZE;
+	grant_ref_t gref_head;
+	unsigned int i;
+	void *addr;
+	int ret;
+
+	addr = *vaddr = alloc_pages_exact(ring_size, gfp | __GFP_ZERO);
+	if (!*vaddr) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = gnttab_alloc_grant_references(nr_pages, &gref_head);
+	if (ret) {
+		xenbus_dev_fatal(dev, ret, "granting access to %u ring pages",
+				 nr_pages);
+		goto err;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		unsigned long gfn;
+
+		if (is_vmalloc_addr(*vaddr))
+			gfn = pfn_to_gfn(vmalloc_to_pfn(addr));
+		else
+			gfn = virt_to_gfn(addr);
+
+		grefs[i] = gnttab_claim_grant_reference(&gref_head);
+		gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id,
+						gfn, 0);
+
+		addr += XEN_PAGE_SIZE;
+	}
+
+	return 0;
+
+ err:
+	if (*vaddr)
+		free_pages_exact(*vaddr, ring_size);
+	for (i = 0; i < nr_pages; i++)
+		grefs[i] = INVALID_GRANT_REF;
+	*vaddr = NULL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_setup_ring);
+
+/*
+ * xenbus_teardown_ring
+ * @vaddr: starting virtual address of the ring
+ * @nr_pages: number of pages
+ * @grefs: grant reference array
+ *
+ * Remove grants for the shared ring buffer and free the associated memory.
+ * On return the grant reference array is filled with INVALID_GRANT_REF.
+ */
+void xenbus_teardown_ring(void **vaddr, unsigned int nr_pages,
+			  grant_ref_t *grefs)
+{
+	unsigned int i;
+
+	for (i = 0; i < nr_pages; i++) {
+		if (grefs[i] != INVALID_GRANT_REF) {
+			gnttab_end_foreign_access(grefs[i], NULL);
+			grefs[i] = INVALID_GRANT_REF;
+		}
+	}
+
+	if (*vaddr)
+		free_pages_exact(*vaddr, nr_pages * XEN_PAGE_SIZE);
+	*vaddr = NULL;
+}
+EXPORT_SYMBOL_GPL(xenbus_teardown_ring);
+
+/**
+ * Allocate an event channel for the given xenbus_device, assigning the newly
+ * created local port to *port.  Return 0 on success, or -errno on error.  On
+ * error, the device will switch to XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port)
+{
+	struct evtchn_alloc_unbound alloc_unbound;
+	int err;
+
+	alloc_unbound.dom = DOMID_SELF;
+	alloc_unbound.remote_dom = dev->otherend_id;
+
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+					  &alloc_unbound);
+	if (err)
+		xenbus_dev_fatal(dev, err, "allocating event channel");
+	else
+		*port = alloc_unbound.port;
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
+
+
+/**
+ * Free an existing event channel. Returns 0 on success or -errno on error.
+ */
+int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port)
+{
+	struct evtchn_close close;
+	int err;
+
+	close.port = port;
+
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+	if (err)
+		xenbus_dev_error(dev, err, "freeing event channel %u", port);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
+
+
+/**
+ * xenbus_map_ring_valloc
+ * @dev: xenbus device
+ * @gnt_refs: grant reference array
+ * @nr_grefs: number of grant references
+ * @vaddr: pointer to address to be filled out by mapping
+ *
+ * Map @nr_grefs pages of memory into this domain from another
+ * domain's grant table.  xenbus_map_ring_valloc allocates @nr_grefs
+ * pages of virtual address space, maps the pages to that address, and
+ * sets *vaddr to that address.  Returns 0 on success, and -errno on
+ * error. If an error is returned, device will switch to
+ * XenbusStateClosing and the error message will be saved in XenStore.
+ */
+int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
+			   unsigned int nr_grefs, void **vaddr)
+{
+	int err;
+	struct map_ring_valloc *info;
+
+	*vaddr = NULL;
+
+	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
+		return -EINVAL;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->node = kzalloc(sizeof(*info->node), GFP_KERNEL);
+	if (!info->node)
+		err = -ENOMEM;
+	else
+		err = ring_ops->map(dev, info, gnt_refs, nr_grefs, vaddr);
+
+	kfree(info->node);
+	kfree(info);
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
+
+/* N.B. sizeof(phys_addr_t) doesn't always equal to sizeof(unsigned
+ * long), e.g. 32-on-64.  Caller is responsible for preparing the
+ * right array to feed into this function */
+static int __xenbus_map_ring(struct xenbus_device *dev,
+			     grant_ref_t *gnt_refs,
+			     unsigned int nr_grefs,
+			     grant_handle_t *handles,
+			     struct map_ring_valloc *info,
+			     unsigned int flags,
+			     bool *leaked)
+{
+	int i, j;
+
+	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
+		return -EINVAL;
+
+	for (i = 0; i < nr_grefs; i++) {
+		gnttab_set_map_op(&info->map[i], info->phys_addrs[i], flags,
+				  gnt_refs[i], dev->otherend_id);
+		handles[i] = INVALID_GRANT_HANDLE;
+	}
+
+	gnttab_batch_map(info->map, i);
+
+	for (i = 0; i < nr_grefs; i++) {
+		if (info->map[i].status != GNTST_okay) {
+			xenbus_dev_fatal(dev, info->map[i].status,
+					 "mapping in shared page %d from domain %d",
+					 gnt_refs[i], dev->otherend_id);
+			goto fail;
+		} else
+			handles[i] = info->map[i].handle;
+	}
+
+	return 0;
+
+ fail:
+	for (i = j = 0; i < nr_grefs; i++) {
+		if (handles[i] != INVALID_GRANT_HANDLE) {
+			gnttab_set_unmap_op(&info->unmap[j],
+					    info->phys_addrs[i],
+					    GNTMAP_host_map, handles[i]);
+			j++;
+		}
+	}
+
+	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j));
+
+	*leaked = false;
+	for (i = 0; i < j; i++) {
+		if (info->unmap[i].status != GNTST_okay) {
+			*leaked = true;
+			break;
+		}
+	}
+
+	return -ENOENT;
+}
+
+/**
+ * xenbus_unmap_ring
+ * @dev: xenbus device
+ * @handles: grant handle array
+ * @nr_handles: number of handles in the array
+ * @vaddrs: addresses to unmap
+ *
+ * Unmap memory in this domain that was imported from another domain.
+ * Returns 0 on success and returns GNTST_* on error
+ * (see xen/include/interface/grant_table.h).
+ */
+static int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles,
+			     unsigned int nr_handles, unsigned long *vaddrs)
+{
+	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
+	int i;
+	int err;
+
+	if (nr_handles > XENBUS_MAX_RING_GRANTS)
+		return -EINVAL;
+
+	for (i = 0; i < nr_handles; i++)
+		gnttab_set_unmap_op(&unmap[i], vaddrs[i],
+				    GNTMAP_host_map, handles[i]);
+
+	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
+
+	err = GNTST_okay;
+	for (i = 0; i < nr_handles; i++) {
+		if (unmap[i].status != GNTST_okay) {
+			xenbus_dev_error(dev, unmap[i].status,
+					 "unmapping page at handle %d error %d",
+					 handles[i], unmap[i].status);
+			err = unmap[i].status;
+			break;
+		}
+	}
+
+	return err;
+}
+
+static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn,
+					    unsigned int goffset,
+					    unsigned int len,
+					    void *data)
+{
+	struct map_ring_valloc *info = data;
+	unsigned long vaddr = (unsigned long)gfn_to_virt(gfn);
+
+	info->phys_addrs[info->idx] = vaddr;
+	info->addrs[info->idx] = vaddr;
+
+	info->idx++;
+}
+
+static int xenbus_map_ring_hvm(struct xenbus_device *dev,
+			       struct map_ring_valloc *info,
+			       grant_ref_t *gnt_ref,
+			       unsigned int nr_grefs,
+			       void **vaddr)
+{
+	struct xenbus_map_node *node = info->node;
+	int err;
+	void *addr;
+	bool leaked = false;
+	unsigned int nr_pages = XENBUS_PAGES(nr_grefs);
+
+	err = xen_alloc_unpopulated_pages(nr_pages, node->hvm.pages);
+	if (err)
+		goto out_err;
+
+	gnttab_foreach_grant(node->hvm.pages, nr_grefs,
+			     xenbus_map_ring_setup_grant_hvm,
+			     info);
+
+	err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles,
+				info, GNTMAP_host_map, &leaked);
+	node->nr_handles = nr_grefs;
+
+	if (err)
+		goto out_free_ballooned_pages;
+
+	addr = vmap(node->hvm.pages, nr_pages, VM_MAP | VM_IOREMAP,
+		    PAGE_KERNEL);
+	if (!addr) {
+		err = -ENOMEM;
+		goto out_xenbus_unmap_ring;
+	}
+
+	node->hvm.addr = addr;
+
+	spin_lock(&xenbus_valloc_lock);
+	list_add(&node->next, &xenbus_valloc_pages);
+	spin_unlock(&xenbus_valloc_lock);
+
+	*vaddr = addr;
+	info->node = NULL;
+
+	return 0;
+
+ out_xenbus_unmap_ring:
+	if (!leaked)
+		xenbus_unmap_ring(dev, node->handles, nr_grefs, info->addrs);
+	else
+		pr_alert("leaking %p size %u page(s)",
+			 addr, nr_pages);
+ out_free_ballooned_pages:
+	if (!leaked)
+		xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
+ out_err:
+	return err;
+}
+
+/**
+ * xenbus_unmap_ring_vfree
+ * @dev: xenbus device
+ * @vaddr: addr to unmap
+ *
+ * Based on Rusty Russell's skeleton driver's unmap_page.
+ * Unmap a page of memory in this domain that was imported from another domain.
+ * Use xenbus_unmap_ring_vfree if you mapped in your memory with
+ * xenbus_map_ring_valloc (it will free the virtual address space).
+ * Returns 0 on success and returns GNTST_* on error
+ * (see xen/include/interface/grant_table.h).
+ */
+int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
+{
+	return ring_ops->unmap(dev, vaddr);
+}
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
+
+#ifdef CONFIG_XEN_PV
+static int map_ring_apply(pte_t *pte, unsigned long addr, void *data)
+{
+	struct map_ring_valloc *info = data;
+
+	info->phys_addrs[info->idx++] = arbitrary_virt_to_machine(pte).maddr;
+	return 0;
+}
+
+static int xenbus_map_ring_pv(struct xenbus_device *dev,
+			      struct map_ring_valloc *info,
+			      grant_ref_t *gnt_refs,
+			      unsigned int nr_grefs,
+			      void **vaddr)
+{
+	struct xenbus_map_node *node = info->node;
+	struct vm_struct *area;
+	bool leaked = false;
+	int err = -ENOMEM;
+
+	area = get_vm_area(XEN_PAGE_SIZE * nr_grefs, VM_IOREMAP);
+	if (!area)
+		return -ENOMEM;
+	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
+				XEN_PAGE_SIZE * nr_grefs, map_ring_apply, info))
+		goto failed;
+	err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
+				info, GNTMAP_host_map | GNTMAP_contains_pte,
+				&leaked);
+	if (err)
+		goto failed;
+
+	node->nr_handles = nr_grefs;
+	node->pv.area = area;
+
+	spin_lock(&xenbus_valloc_lock);
+	list_add(&node->next, &xenbus_valloc_pages);
+	spin_unlock(&xenbus_valloc_lock);
+
+	*vaddr = area->addr;
+	info->node = NULL;
+
+	return 0;
+
+failed:
+	if (!leaked)
+		free_vm_area(area);
+	else
+		pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
+
+	return err;
+}
+
+static int xenbus_unmap_ring_pv(struct xenbus_device *dev, void *vaddr)
+{
+	struct xenbus_map_node *node;
+	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
+	unsigned int level;
+	int i;
+	bool leaked = false;
+	int err;
+
+	spin_lock(&xenbus_valloc_lock);
+	list_for_each_entry(node, &xenbus_valloc_pages, next) {
+		if (node->pv.area->addr == vaddr) {
+			list_del(&node->next);
+			goto found;
+		}
+	}
+	node = NULL;
+ found:
+	spin_unlock(&xenbus_valloc_lock);
+
+	if (!node) {
+		xenbus_dev_error(dev, -ENOENT,
+				 "can't find mapped virtual address %p", vaddr);
+		return GNTST_bad_virt_addr;
+	}
+
+	for (i = 0; i < node->nr_handles; i++) {
+		unsigned long addr;
+
+		memset(&unmap[i], 0, sizeof(unmap[i]));
+		addr = (unsigned long)vaddr + (XEN_PAGE_SIZE * i);
+		unmap[i].host_addr = arbitrary_virt_to_machine(
+			lookup_address(addr, &level)).maddr;
+		unmap[i].dev_bus_addr = 0;
+		unmap[i].handle = node->handles[i];
+	}
+
+	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
+
+	err = GNTST_okay;
+	leaked = false;
+	for (i = 0; i < node->nr_handles; i++) {
+		if (unmap[i].status != GNTST_okay) {
+			leaked = true;
+			xenbus_dev_error(dev, unmap[i].status,
+					 "unmapping page at handle %d error %d",
+					 node->handles[i], unmap[i].status);
+			err = unmap[i].status;
+			break;
+		}
+	}
+
+	if (!leaked)
+		free_vm_area(node->pv.area);
+	else
+		pr_alert("leaking VM area %p size %u page(s)",
+			 node->pv.area, node->nr_handles);
+
+	kfree(node);
+	return err;
+}
+
+static const struct xenbus_ring_ops ring_ops_pv = {
+	.map = xenbus_map_ring_pv,
+	.unmap = xenbus_unmap_ring_pv,
+};
+#endif
+
+struct unmap_ring_hvm
+{
+	unsigned int idx;
+	unsigned long addrs[XENBUS_MAX_RING_GRANTS];
+};
+
+static void xenbus_unmap_ring_setup_grant_hvm(unsigned long gfn,
+					      unsigned int goffset,
+					      unsigned int len,
+					      void *data)
+{
+	struct unmap_ring_hvm *info = data;
+
+	info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn);
+
+	info->idx++;
+}
+
+static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr)
+{
+	int rv;
+	struct xenbus_map_node *node;
+	void *addr;
+	struct unmap_ring_hvm info = {
+		.idx = 0,
+	};
+	unsigned int nr_pages;
+
+	spin_lock(&xenbus_valloc_lock);
+	list_for_each_entry(node, &xenbus_valloc_pages, next) {
+		addr = node->hvm.addr;
+		if (addr == vaddr) {
+			list_del(&node->next);
+			goto found;
+		}
+	}
+	node = addr = NULL;
+ found:
+	spin_unlock(&xenbus_valloc_lock);
+
+	if (!node) {
+		xenbus_dev_error(dev, -ENOENT,
+				 "can't find mapped virtual address %p", vaddr);
+		return GNTST_bad_virt_addr;
+	}
+
+	nr_pages = XENBUS_PAGES(node->nr_handles);
+
+	gnttab_foreach_grant(node->hvm.pages, node->nr_handles,
+			     xenbus_unmap_ring_setup_grant_hvm,
+			     &info);
+
+	rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles,
+			       info.addrs);
+	if (!rv) {
+		vunmap(vaddr);
+		xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
+	}
+	else
+		WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages);
+
+	kfree(node);
+	return rv;
+}
+
+/**
+ * xenbus_read_driver_state
+ * @path: path for driver
+ *
+ * Return the state of the driver rooted at the given store path, or
+ * XenbusStateUnknown if no state can be read.
+ */
+enum xenbus_state xenbus_read_driver_state(const char *path)
+{
+	enum xenbus_state result;
+	int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
+	if (err)
+		result = XenbusStateUnknown;
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
+
+static const struct xenbus_ring_ops ring_ops_hvm = {
+	.map = xenbus_map_ring_hvm,
+	.unmap = xenbus_unmap_ring_hvm,
+};
+
+void __init xenbus_ring_ops_init(void)
+{
+#ifdef CONFIG_XEN_PV
+	if (!xen_feature(XENFEAT_auto_translated_physmap))
+		ring_ops = &ring_ops_pv;
+	else
+#endif
+		ring_ops = &ring_ops_hvm;
+}
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
new file mode 100644
index 000000000..e5fda0256
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -0,0 +1,484 @@
+/******************************************************************************
+ * xenbus_comms.c
+ *
+ * Low level code to talks to Xen Store: ringbuffer and event channel.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <xen/xenbus.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include "xenbus.h"
+
+/* A list of replies. Currently only one will ever be outstanding. */
+LIST_HEAD(xs_reply_list);
+
+/* A list of write requests. */
+LIST_HEAD(xb_write_list);
+DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
+DEFINE_MUTEX(xb_write_mutex);
+
+/* Protect xenbus reader thread against save/restore. */
+DEFINE_MUTEX(xs_response_mutex);
+
+static int xenbus_irq;
+static struct task_struct *xenbus_task;
+
+static irqreturn_t wake_waiting(int irq, void *unused)
+{
+	wake_up(&xb_waitq);
+	return IRQ_HANDLED;
+}
+
+static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
+{
+	return ((prod - cons) <= XENSTORE_RING_SIZE);
+}
+
+static void *get_output_chunk(XENSTORE_RING_IDX cons,
+			      XENSTORE_RING_IDX prod,
+			      char *buf, uint32_t *len)
+{
+	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
+	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
+		*len = XENSTORE_RING_SIZE - (prod - cons);
+	return buf + MASK_XENSTORE_IDX(prod);
+}
+
+static const void *get_input_chunk(XENSTORE_RING_IDX cons,
+				   XENSTORE_RING_IDX prod,
+				   const char *buf, uint32_t *len)
+{
+	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
+	if ((prod - cons) < *len)
+		*len = prod - cons;
+	return buf + MASK_XENSTORE_IDX(cons);
+}
+
+static int xb_data_to_write(void)
+{
+	struct xenstore_domain_interface *intf = xen_store_interface;
+
+	return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
+		!list_empty(&xb_write_list);
+}
+
+/**
+ * xb_write - low level write
+ * @data: buffer to send
+ * @len: length of buffer
+ *
+ * Returns number of bytes written or -err.
+ */
+static int xb_write(const void *data, unsigned int len)
+{
+	struct xenstore_domain_interface *intf = xen_store_interface;
+	XENSTORE_RING_IDX cons, prod;
+	unsigned int bytes = 0;
+
+	while (len != 0) {
+		void *dst;
+		unsigned int avail;
+
+		/* Read indexes, then verify. */
+		cons = intf->req_cons;
+		prod = intf->req_prod;
+		if (!check_indexes(cons, prod)) {
+			intf->req_cons = intf->req_prod = 0;
+			return -EIO;
+		}
+		if (!xb_data_to_write())
+			return bytes;
+
+		/* Must write data /after/ reading the consumer index. */
+		virt_mb();
+
+		dst = get_output_chunk(cons, prod, intf->req, &avail);
+		if (avail == 0)
+			continue;
+		if (avail > len)
+			avail = len;
+
+		memcpy(dst, data, avail);
+		data += avail;
+		len -= avail;
+		bytes += avail;
+
+		/* Other side must not see new producer until data is there. */
+		virt_wmb();
+		intf->req_prod += avail;
+
+		/* Implies mb(): other side will see the updated producer. */
+		if (prod <= intf->req_cons)
+			notify_remote_via_evtchn(xen_store_evtchn);
+	}
+
+	return bytes;
+}
+
+static int xb_data_to_read(void)
+{
+	struct xenstore_domain_interface *intf = xen_store_interface;
+	return (intf->rsp_cons != intf->rsp_prod);
+}
+
+static int xb_read(void *data, unsigned int len)
+{
+	struct xenstore_domain_interface *intf = xen_store_interface;
+	XENSTORE_RING_IDX cons, prod;
+	unsigned int bytes = 0;
+
+	while (len != 0) {
+		unsigned int avail;
+		const char *src;
+
+		/* Read indexes, then verify. */
+		cons = intf->rsp_cons;
+		prod = intf->rsp_prod;
+		if (cons == prod)
+			return bytes;
+
+		if (!check_indexes(cons, prod)) {
+			intf->rsp_cons = intf->rsp_prod = 0;
+			return -EIO;
+		}
+
+		src = get_input_chunk(cons, prod, intf->rsp, &avail);
+		if (avail == 0)
+			continue;
+		if (avail > len)
+			avail = len;
+
+		/* Must read data /after/ reading the producer index. */
+		virt_rmb();
+
+		memcpy(data, src, avail);
+		data += avail;
+		len -= avail;
+		bytes += avail;
+
+		/* Other side must not see free space until we've copied out */
+		virt_mb();
+		intf->rsp_cons += avail;
+
+		/* Implies mb(): other side will see the updated consumer. */
+		if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
+			notify_remote_via_evtchn(xen_store_evtchn);
+	}
+
+	return bytes;
+}
+
+static int process_msg(void)
+{
+	static struct {
+		struct xsd_sockmsg msg;
+		char *body;
+		union {
+			void *alloc;
+			struct xs_watch_event *watch;
+		};
+		bool in_msg;
+		bool in_hdr;
+		unsigned int read;
+	} state;
+	struct xb_req_data *req;
+	int err;
+	unsigned int len;
+
+	if (!state.in_msg) {
+		state.in_msg = true;
+		state.in_hdr = true;
+		state.read = 0;
+
+		/*
+		 * We must disallow save/restore while reading a message.
+		 * A partial read across s/r leaves us out of sync with
+		 * xenstored.
+		 * xs_response_mutex is locked as long as we are processing one
+		 * message. state.in_msg will be true as long as we are holding
+		 * the lock here.
+		 */
+		mutex_lock(&xs_response_mutex);
+
+		if (!xb_data_to_read()) {
+			/* We raced with save/restore: pending data 'gone'. */
+			mutex_unlock(&xs_response_mutex);
+			state.in_msg = false;
+			return 0;
+		}
+	}
+
+	if (state.in_hdr) {
+		if (state.read != sizeof(state.msg)) {
+			err = xb_read((void *)&state.msg + state.read,
+				      sizeof(state.msg) - state.read);
+			if (err < 0)
+				goto out;
+			state.read += err;
+			if (state.read != sizeof(state.msg))
+				return 0;
+			if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
+				err = -EINVAL;
+				goto out;
+			}
+		}
+
+		len = state.msg.len + 1;
+		if (state.msg.type == XS_WATCH_EVENT)
+			len += sizeof(*state.watch);
+
+		state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
+		if (!state.alloc)
+			return -ENOMEM;
+
+		if (state.msg.type == XS_WATCH_EVENT)
+			state.body = state.watch->body;
+		else
+			state.body = state.alloc;
+		state.in_hdr = false;
+		state.read = 0;
+	}
+
+	err = xb_read(state.body + state.read, state.msg.len - state.read);
+	if (err < 0)
+		goto out;
+
+	state.read += err;
+	if (state.read != state.msg.len)
+		return 0;
+
+	state.body[state.msg.len] = '\0';
+
+	if (state.msg.type == XS_WATCH_EVENT) {
+		state.watch->len = state.msg.len;
+		err = xs_watch_msg(state.watch);
+	} else {
+		err = -ENOENT;
+		mutex_lock(&xb_write_mutex);
+		list_for_each_entry(req, &xs_reply_list, list) {
+			if (req->msg.req_id == state.msg.req_id) {
+				list_del(&req->list);
+				err = 0;
+				break;
+			}
+		}
+		mutex_unlock(&xb_write_mutex);
+		if (err)
+			goto out;
+
+		if (req->state == xb_req_state_wait_reply) {
+			req->msg.req_id = req->caller_req_id;
+			req->msg.type = state.msg.type;
+			req->msg.len = state.msg.len;
+			req->body = state.body;
+			/* write body, then update state */
+			virt_wmb();
+			req->state = xb_req_state_got_reply;
+			req->cb(req);
+		} else
+			kfree(req);
+	}
+
+	mutex_unlock(&xs_response_mutex);
+
+	state.in_msg = false;
+	state.alloc = NULL;
+	return err;
+
+ out:
+	mutex_unlock(&xs_response_mutex);
+	state.in_msg = false;
+	kfree(state.alloc);
+	state.alloc = NULL;
+	return err;
+}
+
+static int process_writes(void)
+{
+	static struct {
+		struct xb_req_data *req;
+		int idx;
+		unsigned int written;
+	} state;
+	void *base;
+	unsigned int len;
+	int err = 0;
+
+	if (!xb_data_to_write())
+		return 0;
+
+	mutex_lock(&xb_write_mutex);
+
+	if (!state.req) {
+		state.req = list_first_entry(&xb_write_list,
+					     struct xb_req_data, list);
+		state.idx = -1;
+		state.written = 0;
+	}
+
+	if (state.req->state == xb_req_state_aborted)
+		goto out_err;
+
+	while (state.idx < state.req->num_vecs) {
+		if (state.idx < 0) {
+			base = &state.req->msg;
+			len = sizeof(state.req->msg);
+		} else {
+			base = state.req->vec[state.idx].iov_base;
+			len = state.req->vec[state.idx].iov_len;
+		}
+		err = xb_write(base + state.written, len - state.written);
+		if (err < 0)
+			goto out_err;
+		state.written += err;
+		if (state.written != len)
+			goto out;
+
+		state.idx++;
+		state.written = 0;
+	}
+
+	list_del(&state.req->list);
+	state.req->state = xb_req_state_wait_reply;
+	list_add_tail(&state.req->list, &xs_reply_list);
+	state.req = NULL;
+
+ out:
+	mutex_unlock(&xb_write_mutex);
+
+	return 0;
+
+ out_err:
+	state.req->msg.type = XS_ERROR;
+	state.req->err = err;
+	list_del(&state.req->list);
+	if (state.req->state == xb_req_state_aborted)
+		kfree(state.req);
+	else {
+		/* write err, then update state */
+		virt_wmb();
+		state.req->state = xb_req_state_got_reply;
+		wake_up(&state.req->wq);
+	}
+
+	mutex_unlock(&xb_write_mutex);
+
+	state.req = NULL;
+
+	return err;
+}
+
+static int xb_thread_work(void)
+{
+	return xb_data_to_read() || xb_data_to_write();
+}
+
+static int xenbus_thread(void *unused)
+{
+	int err;
+
+	while (!kthread_should_stop()) {
+		if (wait_event_interruptible(xb_waitq, xb_thread_work()))
+			continue;
+
+		err = process_msg();
+		if (err == -ENOMEM)
+			schedule();
+		else if (err)
+			pr_warn_ratelimited("error %d while reading message\n",
+					    err);
+
+		err = process_writes();
+		if (err)
+			pr_warn_ratelimited("error %d while writing message\n",
+					    err);
+	}
+
+	xenbus_task = NULL;
+	return 0;
+}
+
+/**
+ * xb_init_comms - Set up interrupt handler off store event channel.
+ */
+int xb_init_comms(void)
+{
+	struct xenstore_domain_interface *intf = xen_store_interface;
+
+	if (intf->req_prod != intf->req_cons)
+		pr_err("request ring is not quiescent (%08x:%08x)!\n",
+		       intf->req_cons, intf->req_prod);
+
+	if (intf->rsp_prod != intf->rsp_cons) {
+		pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n",
+			intf->rsp_cons, intf->rsp_prod);
+		/* breaks kdump */
+		if (!reset_devices)
+			intf->rsp_cons = intf->rsp_prod;
+	}
+
+	if (xenbus_irq) {
+		/* Already have an irq; assume we're resuming */
+		rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
+	} else {
+		int err;
+
+		err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
+						0, "xenbus", &xb_waitq);
+		if (err < 0) {
+			pr_err("request irq failed %i\n", err);
+			return err;
+		}
+
+		xenbus_irq = err;
+
+		if (!xenbus_task) {
+			xenbus_task = kthread_run(xenbus_thread, NULL,
+						  "xenbus");
+			if (IS_ERR(xenbus_task))
+				return PTR_ERR(xenbus_task);
+		}
+	}
+
+	return 0;
+}
+
+void xb_deinit_comms(void)
+{
+	unbind_from_irqhandler(xenbus_irq, &xb_waitq);
+	xenbus_irq = 0;
+}
diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c
new file mode 100644
index 000000000..edba5fecd
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_dev_backend.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/init.h>
+#include <linux/capability.h>
+
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/xenbus.h>
+#include <xen/xenbus_dev.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+#include <asm/xen/hypervisor.h>
+
+#include "xenbus.h"
+
+static int xenbus_backend_open(struct inode *inode, struct file *filp)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return nonseekable_open(inode, filp);
+}
+
+static long xenbus_alloc(domid_t domid)
+{
+	struct evtchn_alloc_unbound arg;
+	int err = -EEXIST;
+
+	xs_suspend();
+
+	/* If xenstored_ready is nonzero, that means we have already talked to
+	 * xenstore and set up watches. These watches will be restored by
+	 * xs_resume, but that requires communication over the port established
+	 * below that is not visible to anyone until the ioctl returns.
+	 *
+	 * This can be resolved by splitting the ioctl into two parts
+	 * (postponing the resume until xenstored is active) but this is
+	 * unnecessarily complex for the intended use where xenstored is only
+	 * started once - so return -EEXIST if it's already running.
+	 */
+	if (xenstored_ready)
+		goto out_err;
+
+	gnttab_grant_foreign_access_ref(GNTTAB_RESERVED_XENSTORE, domid,
+			virt_to_gfn(xen_store_interface), 0 /* writable */);
+
+	arg.dom = DOMID_SELF;
+	arg.remote_dom = domid;
+
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &arg);
+	if (err)
+		goto out_err;
+
+	if (xen_store_evtchn > 0)
+		xb_deinit_comms();
+
+	xen_store_evtchn = arg.port;
+
+	xs_resume();
+
+	return arg.port;
+
+ out_err:
+	xs_suspend_cancel();
+	return err;
+}
+
+static long xenbus_backend_ioctl(struct file *file, unsigned int cmd,
+				 unsigned long data)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case IOCTL_XENBUS_BACKEND_EVTCHN:
+		if (xen_store_evtchn > 0)
+			return xen_store_evtchn;
+		return -ENODEV;
+	case IOCTL_XENBUS_BACKEND_SETUP:
+		return xenbus_alloc(data);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	size_t size = vma->vm_end - vma->vm_start;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0))
+		return -EINVAL;
+
+	if (remap_pfn_range(vma, vma->vm_start,
+			    virt_to_pfn(xen_store_interface),
+			    size, vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+static const struct file_operations xenbus_backend_fops = {
+	.open = xenbus_backend_open,
+	.mmap = xenbus_backend_mmap,
+	.unlocked_ioctl = xenbus_backend_ioctl,
+};
+
+static struct miscdevice xenbus_backend_dev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "xen/xenbus_backend",
+	.fops = &xenbus_backend_fops,
+};
+
+static int __init xenbus_backend_init(void)
+{
+	int err;
+
+	if (!xen_initial_domain())
+		return -ENODEV;
+
+	err = misc_register(&xenbus_backend_dev);
+	if (err)
+		pr_err("Could not register xenbus backend device\n");
+	return err;
+}
+device_initcall(xenbus_backend_init);
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
new file mode 100644
index 000000000..0792fda49
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -0,0 +1,725 @@
+/*
+ * Driver giving user-space access to the kernel's xenbus connection
+ * to xenstore.
+ *
+ * Copyright (c) 2005, Christian Limpach
+ * Copyright (c) 2005, Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Changes:
+ * 2008-10-07  Alex Zeffertt    Replaced /proc/xen/xenbus with xenfs filesystem
+ *                              and /proc/xen compatibility mount point.
+ *                              Turned xenfs into a loadable module.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/uio.h>
+#include <linux/notifier.h>
+#include <linux/wait.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/uaccess.h>
+#include <linux/init.h>
+#include <linux/namei.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/miscdevice.h>
+#include <linux/workqueue.h>
+
+#include <xen/xenbus.h>
+#include <xen/xen.h>
+#include <asm/xen/hypervisor.h>
+
+#include "xenbus.h"
+
+unsigned int xb_dev_generation_id;
+
+/*
+ * An element of a list of outstanding transactions, for which we're
+ * still waiting a reply.
+ */
+struct xenbus_transaction_holder {
+	struct list_head list;
+	struct xenbus_transaction handle;
+	unsigned int generation_id;
+};
+
+/*
+ * A buffer of data on the queue.
+ */
+struct read_buffer {
+	struct list_head list;
+	unsigned int cons;
+	unsigned int len;
+	char msg[];
+};
+
+struct xenbus_file_priv {
+	/*
+	 * msgbuffer_mutex is held while partial requests are built up
+	 * and complete requests are acted on.  It therefore protects
+	 * the "transactions" and "watches" lists, and the partial
+	 * request length and buffer.
+	 *
+	 * reply_mutex protects the reply being built up to return to
+	 * usermode.  It nests inside msgbuffer_mutex but may be held
+	 * alone during a watch callback.
+	 */
+	struct mutex msgbuffer_mutex;
+
+	/* In-progress transactions */
+	struct list_head transactions;
+
+	/* Active watches. */
+	struct list_head watches;
+
+	/* Partial request. */
+	unsigned int len;
+	union {
+		struct xsd_sockmsg msg;
+		char buffer[XENSTORE_PAYLOAD_MAX];
+	} u;
+
+	/* Response queue. */
+	struct mutex reply_mutex;
+	struct list_head read_buffers;
+	wait_queue_head_t read_waitq;
+
+	struct kref kref;
+
+	struct work_struct wq;
+};
+
+/* Read out any raw xenbus messages queued up. */
+static ssize_t xenbus_file_read(struct file *filp,
+			       char __user *ubuf,
+			       size_t len, loff_t *ppos)
+{
+	struct xenbus_file_priv *u = filp->private_data;
+	struct read_buffer *rb;
+	ssize_t i;
+	int ret;
+
+	mutex_lock(&u->reply_mutex);
+again:
+	while (list_empty(&u->read_buffers)) {
+		mutex_unlock(&u->reply_mutex);
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		ret = wait_event_interruptible(u->read_waitq,
+					       !list_empty(&u->read_buffers));
+		if (ret)
+			return ret;
+		mutex_lock(&u->reply_mutex);
+	}
+
+	rb = list_entry(u->read_buffers.next, struct read_buffer, list);
+	i = 0;
+	while (i < len) {
+		size_t sz = min_t(size_t, len - i, rb->len - rb->cons);
+
+		ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz);
+
+		i += sz - ret;
+		rb->cons += sz - ret;
+
+		if (ret != 0) {
+			if (i == 0)
+				i = -EFAULT;
+			goto out;
+		}
+
+		/* Clear out buffer if it has been consumed */
+		if (rb->cons == rb->len) {
+			list_del(&rb->list);
+			kfree(rb);
+			if (list_empty(&u->read_buffers))
+				break;
+			rb = list_entry(u->read_buffers.next,
+					struct read_buffer, list);
+		}
+	}
+	if (i == 0)
+		goto again;
+
+out:
+	mutex_unlock(&u->reply_mutex);
+	return i;
+}
+
+/*
+ * Add a buffer to the queue.  Caller must hold the appropriate lock
+ * if the queue is not local.  (Commonly the caller will build up
+ * multiple queued buffers on a temporary local list, and then add it
+ * to the appropriate list under lock once all the buffers have een
+ * successfully allocated.)
+ */
+static int queue_reply(struct list_head *queue, const void *data, size_t len)
+{
+	struct read_buffer *rb;
+
+	if (len == 0)
+		return 0;
+	if (len > XENSTORE_PAYLOAD_MAX)
+		return -EINVAL;
+
+	rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
+	if (rb == NULL)
+		return -ENOMEM;
+
+	rb->cons = 0;
+	rb->len = len;
+
+	memcpy(rb->msg, data, len);
+
+	list_add_tail(&rb->list, queue);
+	return 0;
+}
+
+/*
+ * Free all the read_buffer s on a list.
+ * Caller must have sole reference to list.
+ */
+static void queue_cleanup(struct list_head *list)
+{
+	struct read_buffer *rb;
+
+	while (!list_empty(list)) {
+		rb = list_entry(list->next, struct read_buffer, list);
+		list_del(list->next);
+		kfree(rb);
+	}
+}
+
+struct watch_adapter {
+	struct list_head list;
+	struct xenbus_watch watch;
+	struct xenbus_file_priv *dev_data;
+	char *token;
+};
+
+static void free_watch_adapter(struct watch_adapter *watch)
+{
+	kfree(watch->watch.node);
+	kfree(watch->token);
+	kfree(watch);
+}
+
+static struct watch_adapter *alloc_watch_adapter(const char *path,
+						 const char *token)
+{
+	struct watch_adapter *watch;
+
+	watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+	if (watch == NULL)
+		goto out_fail;
+
+	watch->watch.node = kstrdup(path, GFP_KERNEL);
+	if (watch->watch.node == NULL)
+		goto out_free;
+
+	watch->token = kstrdup(token, GFP_KERNEL);
+	if (watch->token == NULL)
+		goto out_free;
+
+	return watch;
+
+out_free:
+	free_watch_adapter(watch);
+
+out_fail:
+	return NULL;
+}
+
+static void watch_fired(struct xenbus_watch *watch,
+			const char *path,
+			const char *token)
+{
+	struct watch_adapter *adap;
+	struct xsd_sockmsg hdr;
+	const char *token_caller;
+	int path_len, tok_len, body_len;
+	int ret;
+	LIST_HEAD(staging_q);
+
+	adap = container_of(watch, struct watch_adapter, watch);
+
+	token_caller = adap->token;
+
+	path_len = strlen(path) + 1;
+	tok_len = strlen(token_caller) + 1;
+	body_len = path_len + tok_len;
+
+	hdr.type = XS_WATCH_EVENT;
+	hdr.len = body_len;
+
+	mutex_lock(&adap->dev_data->reply_mutex);
+
+	ret = queue_reply(&staging_q, &hdr, sizeof(hdr));
+	if (!ret)
+		ret = queue_reply(&staging_q, path, path_len);
+	if (!ret)
+		ret = queue_reply(&staging_q, token_caller, tok_len);
+
+	if (!ret) {
+		/* success: pass reply list onto watcher */
+		list_splice_tail(&staging_q, &adap->dev_data->read_buffers);
+		wake_up(&adap->dev_data->read_waitq);
+	} else
+		queue_cleanup(&staging_q);
+
+	mutex_unlock(&adap->dev_data->reply_mutex);
+}
+
+static void xenbus_worker(struct work_struct *wq)
+{
+	struct xenbus_file_priv *u;
+	struct xenbus_transaction_holder *trans, *tmp;
+	struct watch_adapter *watch, *tmp_watch;
+	struct read_buffer *rb, *tmp_rb;
+
+	u = container_of(wq, struct xenbus_file_priv, wq);
+
+	/*
+	 * No need for locking here because there are no other users,
+	 * by definition.
+	 */
+
+	list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
+		xenbus_transaction_end(trans->handle, 1);
+		list_del(&trans->list);
+		kfree(trans);
+	}
+
+	list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
+		unregister_xenbus_watch(&watch->watch);
+		list_del(&watch->list);
+		free_watch_adapter(watch);
+	}
+
+	list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
+		list_del(&rb->list);
+		kfree(rb);
+	}
+	kfree(u);
+}
+
+static void xenbus_file_free(struct kref *kref)
+{
+	struct xenbus_file_priv *u;
+
+	/*
+	 * We might be called in xenbus_thread().
+	 * Use workqueue to avoid deadlock.
+	 */
+	u = container_of(kref, struct xenbus_file_priv, kref);
+	schedule_work(&u->wq);
+}
+
+static struct xenbus_transaction_holder *xenbus_get_transaction(
+	struct xenbus_file_priv *u, uint32_t tx_id)
+{
+	struct xenbus_transaction_holder *trans;
+
+	list_for_each_entry(trans, &u->transactions, list)
+		if (trans->handle.id == tx_id)
+			return trans;
+
+	return NULL;
+}
+
+void xenbus_dev_queue_reply(struct xb_req_data *req)
+{
+	struct xenbus_file_priv *u = req->par;
+	struct xenbus_transaction_holder *trans = NULL;
+	int rc;
+	LIST_HEAD(staging_q);
+
+	xs_request_exit(req);
+
+	mutex_lock(&u->msgbuffer_mutex);
+
+	if (req->type == XS_TRANSACTION_START) {
+		trans = xenbus_get_transaction(u, 0);
+		if (WARN_ON(!trans))
+			goto out;
+		if (req->msg.type == XS_ERROR) {
+			list_del(&trans->list);
+			kfree(trans);
+		} else {
+			rc = kstrtou32(req->body, 10, &trans->handle.id);
+			if (WARN_ON(rc))
+				goto out;
+		}
+	} else if (req->type == XS_TRANSACTION_END) {
+		trans = xenbus_get_transaction(u, req->msg.tx_id);
+		if (WARN_ON(!trans))
+			goto out;
+		list_del(&trans->list);
+		kfree(trans);
+	}
+
+	mutex_unlock(&u->msgbuffer_mutex);
+
+	mutex_lock(&u->reply_mutex);
+	rc = queue_reply(&staging_q, &req->msg, sizeof(req->msg));
+	if (!rc)
+		rc = queue_reply(&staging_q, req->body, req->msg.len);
+	if (!rc) {
+		list_splice_tail(&staging_q, &u->read_buffers);
+		wake_up(&u->read_waitq);
+	} else {
+		queue_cleanup(&staging_q);
+	}
+	mutex_unlock(&u->reply_mutex);
+
+	kfree(req->body);
+	kfree(req);
+
+	kref_put(&u->kref, xenbus_file_free);
+
+	return;
+
+ out:
+	mutex_unlock(&u->msgbuffer_mutex);
+}
+
+static int xenbus_command_reply(struct xenbus_file_priv *u,
+				unsigned int msg_type, const char *reply)
+{
+	struct {
+		struct xsd_sockmsg hdr;
+		char body[16];
+	} msg;
+	int rc;
+
+	msg.hdr = u->u.msg;
+	msg.hdr.type = msg_type;
+	msg.hdr.len = strlen(reply) + 1;
+	if (msg.hdr.len > sizeof(msg.body))
+		return -E2BIG;
+	memcpy(&msg.body, reply, msg.hdr.len);
+
+	mutex_lock(&u->reply_mutex);
+	rc = queue_reply(&u->read_buffers, &msg, sizeof(msg.hdr) + msg.hdr.len);
+	wake_up(&u->read_waitq);
+	mutex_unlock(&u->reply_mutex);
+
+	if (!rc)
+		kref_put(&u->kref, xenbus_file_free);
+
+	return rc;
+}
+
+static int xenbus_write_transaction(unsigned msg_type,
+				    struct xenbus_file_priv *u)
+{
+	int rc;
+	struct xenbus_transaction_holder *trans = NULL;
+	struct {
+		struct xsd_sockmsg hdr;
+		char body[];
+	} *msg = (void *)u->u.buffer;
+
+	if (msg_type == XS_TRANSACTION_START) {
+		trans = kzalloc(sizeof(*trans), GFP_KERNEL);
+		if (!trans) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		trans->generation_id = xb_dev_generation_id;
+		list_add(&trans->list, &u->transactions);
+	} else if (msg->hdr.tx_id != 0 &&
+		   !xenbus_get_transaction(u, msg->hdr.tx_id))
+		return xenbus_command_reply(u, XS_ERROR, "ENOENT");
+	else if (msg_type == XS_TRANSACTION_END &&
+		 !(msg->hdr.len == 2 &&
+		   (!strcmp(msg->body, "T") || !strcmp(msg->body, "F"))))
+		return xenbus_command_reply(u, XS_ERROR, "EINVAL");
+	else if (msg_type == XS_TRANSACTION_END) {
+		trans = xenbus_get_transaction(u, msg->hdr.tx_id);
+		if (trans && trans->generation_id != xb_dev_generation_id) {
+			list_del(&trans->list);
+			kfree(trans);
+			if (!strcmp(msg->body, "T"))
+				return xenbus_command_reply(u, XS_ERROR,
+							    "EAGAIN");
+			else
+				return xenbus_command_reply(u,
+							    XS_TRANSACTION_END,
+							    "OK");
+		}
+	}
+
+	rc = xenbus_dev_request_and_reply(&msg->hdr, u);
+	if (rc && trans) {
+		list_del(&trans->list);
+		kfree(trans);
+	}
+
+out:
+	return rc;
+}
+
+static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u)
+{
+	struct watch_adapter *watch;
+	char *path, *token;
+	int err, rc;
+
+	path = u->u.buffer + sizeof(u->u.msg);
+	token = memchr(path, 0, u->u.msg.len);
+	if (token == NULL) {
+		rc = xenbus_command_reply(u, XS_ERROR, "EINVAL");
+		goto out;
+	}
+	token++;
+	if (memchr(token, 0, u->u.msg.len - (token - path)) == NULL) {
+		rc = xenbus_command_reply(u, XS_ERROR, "EINVAL");
+		goto out;
+	}
+
+	if (msg_type == XS_WATCH) {
+		watch = alloc_watch_adapter(path, token);
+		if (watch == NULL) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		watch->watch.callback = watch_fired;
+		watch->dev_data = u;
+
+		err = register_xenbus_watch(&watch->watch);
+		if (err) {
+			free_watch_adapter(watch);
+			rc = err;
+			goto out;
+		}
+		list_add(&watch->list, &u->watches);
+	} else {
+		list_for_each_entry(watch, &u->watches, list) {
+			if (!strcmp(watch->token, token) &&
+			    !strcmp(watch->watch.node, path)) {
+				unregister_xenbus_watch(&watch->watch);
+				list_del(&watch->list);
+				free_watch_adapter(watch);
+				break;
+			}
+		}
+	}
+
+	/* Success.  Synthesize a reply to say all is OK. */
+	rc = xenbus_command_reply(u, msg_type, "OK");
+
+out:
+	return rc;
+}
+
+static ssize_t xenbus_file_write(struct file *filp,
+				const char __user *ubuf,
+				size_t len, loff_t *ppos)
+{
+	struct xenbus_file_priv *u = filp->private_data;
+	uint32_t msg_type;
+	int rc = len;
+	int ret;
+
+	/*
+	 * We're expecting usermode to be writing properly formed
+	 * xenbus messages.  If they write an incomplete message we
+	 * buffer it up.  Once it is complete, we act on it.
+	 */
+
+	/*
+	 * Make sure concurrent writers can't stomp all over each
+	 * other's messages and make a mess of our partial message
+	 * buffer.  We don't make any attemppt to stop multiple
+	 * writers from making a mess of each other's incomplete
+	 * messages; we're just trying to guarantee our own internal
+	 * consistency and make sure that single writes are handled
+	 * atomically.
+	 */
+	mutex_lock(&u->msgbuffer_mutex);
+
+	/* Get this out of the way early to avoid confusion */
+	if (len == 0)
+		goto out;
+
+	/* Can't write a xenbus message larger we can buffer */
+	if (len > sizeof(u->u.buffer) - u->len) {
+		/* On error, dump existing buffer */
+		u->len = 0;
+		rc = -EINVAL;
+		goto out;
+	}
+
+	ret = copy_from_user(u->u.buffer + u->len, ubuf, len);
+
+	if (ret != 0) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	/* Deal with a partial copy. */
+	len -= ret;
+	rc = len;
+
+	u->len += len;
+
+	/* Return if we haven't got a full message yet */
+	if (u->len < sizeof(u->u.msg))
+		goto out;	/* not even the header yet */
+
+	/* If we're expecting a message that's larger than we can
+	   possibly send, dump what we have and return an error. */
+	if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) {
+		rc = -E2BIG;
+		u->len = 0;
+		goto out;
+	}
+
+	if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
+		goto out;	/* incomplete data portion */
+
+	/*
+	 * OK, now we have a complete message.  Do something with it.
+	 */
+
+	kref_get(&u->kref);
+
+	msg_type = u->u.msg.type;
+
+	switch (msg_type) {
+	case XS_WATCH:
+	case XS_UNWATCH:
+		/* (Un)Ask for some path to be watched for changes */
+		ret = xenbus_write_watch(msg_type, u);
+		break;
+
+	default:
+		/* Send out a transaction */
+		ret = xenbus_write_transaction(msg_type, u);
+		break;
+	}
+	if (ret != 0) {
+		rc = ret;
+		kref_put(&u->kref, xenbus_file_free);
+	}
+
+	/* Buffered message consumed */
+	u->len = 0;
+
+ out:
+	mutex_unlock(&u->msgbuffer_mutex);
+	return rc;
+}
+
+static int xenbus_file_open(struct inode *inode, struct file *filp)
+{
+	struct xenbus_file_priv *u;
+
+	if (xen_store_evtchn == 0)
+		return -ENOENT;
+
+	stream_open(inode, filp);
+
+	u = kzalloc(sizeof(*u), GFP_KERNEL);
+	if (u == NULL)
+		return -ENOMEM;
+
+	kref_init(&u->kref);
+
+	INIT_LIST_HEAD(&u->transactions);
+	INIT_LIST_HEAD(&u->watches);
+	INIT_LIST_HEAD(&u->read_buffers);
+	init_waitqueue_head(&u->read_waitq);
+	INIT_WORK(&u->wq, xenbus_worker);
+
+	mutex_init(&u->reply_mutex);
+	mutex_init(&u->msgbuffer_mutex);
+
+	filp->private_data = u;
+
+	return 0;
+}
+
+static int xenbus_file_release(struct inode *inode, struct file *filp)
+{
+	struct xenbus_file_priv *u = filp->private_data;
+
+	kref_put(&u->kref, xenbus_file_free);
+
+	return 0;
+}
+
+static __poll_t xenbus_file_poll(struct file *file, poll_table *wait)
+{
+	struct xenbus_file_priv *u = file->private_data;
+
+	poll_wait(file, &u->read_waitq, wait);
+	if (!list_empty(&u->read_buffers))
+		return EPOLLIN | EPOLLRDNORM;
+	return 0;
+}
+
+const struct file_operations xen_xenbus_fops = {
+	.read = xenbus_file_read,
+	.write = xenbus_file_write,
+	.open = xenbus_file_open,
+	.release = xenbus_file_release,
+	.poll = xenbus_file_poll,
+	.llseek = no_llseek,
+};
+EXPORT_SYMBOL_GPL(xen_xenbus_fops);
+
+static struct miscdevice xenbus_dev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "xen/xenbus",
+	.fops = &xen_xenbus_fops,
+};
+
+static int __init xenbus_init(void)
+{
+	int err;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	err = misc_register(&xenbus_dev);
+	if (err)
+		pr_err("Could not register xenbus frontend device\n");
+	return err;
+}
+device_initcall(xenbus_init);
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
new file mode 100644
index 000000000..3205e5d72
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -0,0 +1,1072 @@
+/******************************************************************************
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
+ * Copyright (C) 2005, 2006 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
+
+#define DPRINTK(fmt, args...)				\
+	pr_debug("xenbus_probe (%s:%d) " fmt ".\n",	\
+		 __func__, __LINE__, ##args)
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <asm/page.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/xen-ops.h>
+#include <xen/page.h>
+
+#include <xen/hvm.h>
+
+#include "xenbus.h"
+
+
+static int xs_init_irq;
+int xen_store_evtchn;
+EXPORT_SYMBOL_GPL(xen_store_evtchn);
+
+struct xenstore_domain_interface *xen_store_interface;
+EXPORT_SYMBOL_GPL(xen_store_interface);
+
+enum xenstore_init xen_store_domain_type;
+EXPORT_SYMBOL_GPL(xen_store_domain_type);
+
+static unsigned long xen_store_gfn;
+
+static BLOCKING_NOTIFIER_HEAD(xenstore_chain);
+
+/* If something in array of ids matches this device, return it. */
+static const struct xenbus_device_id *
+match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
+{
+	for (; *arr->devicetype != '\0'; arr++) {
+		if (!strcmp(arr->devicetype, dev->devicetype))
+			return arr;
+	}
+	return NULL;
+}
+
+int xenbus_match(struct device *_dev, struct device_driver *_drv)
+{
+	struct xenbus_driver *drv = to_xenbus_driver(_drv);
+
+	if (!drv->ids)
+		return 0;
+
+	return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
+}
+EXPORT_SYMBOL_GPL(xenbus_match);
+
+
+static void free_otherend_details(struct xenbus_device *dev)
+{
+	kfree(dev->otherend);
+	dev->otherend = NULL;
+}
+
+
+static void free_otherend_watch(struct xenbus_device *dev)
+{
+	if (dev->otherend_watch.node) {
+		unregister_xenbus_watch(&dev->otherend_watch);
+		kfree(dev->otherend_watch.node);
+		dev->otherend_watch.node = NULL;
+	}
+}
+
+
+static int talk_to_otherend(struct xenbus_device *dev)
+{
+	struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
+
+	free_otherend_watch(dev);
+	free_otherend_details(dev);
+
+	return drv->read_otherend_details(dev);
+}
+
+
+
+static int watch_otherend(struct xenbus_device *dev)
+{
+	struct xen_bus_type *bus =
+		container_of(dev->dev.bus, struct xen_bus_type, bus);
+
+	return xenbus_watch_pathfmt(dev, &dev->otherend_watch,
+				    bus->otherend_will_handle,
+				    bus->otherend_changed,
+				    "%s/%s", dev->otherend, "state");
+}
+
+
+int xenbus_read_otherend_details(struct xenbus_device *xendev,
+				 char *id_node, char *path_node)
+{
+	int err = xenbus_gather(XBT_NIL, xendev->nodename,
+				id_node, "%i", &xendev->otherend_id,
+				path_node, NULL, &xendev->otherend,
+				NULL);
+	if (err) {
+		xenbus_dev_fatal(xendev, err,
+				 "reading other end details from %s",
+				 xendev->nodename);
+		return err;
+	}
+	if (strlen(xendev->otherend) == 0 ||
+	    !xenbus_exists(XBT_NIL, xendev->otherend, "")) {
+		xenbus_dev_fatal(xendev, -ENOENT,
+				 "unable to read other end from %s.  "
+				 "missing or inaccessible.",
+				 xendev->nodename);
+		free_otherend_details(xendev);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_read_otherend_details);
+
+void xenbus_otherend_changed(struct xenbus_watch *watch,
+			     const char *path, const char *token,
+			     int ignore_on_shutdown)
+{
+	struct xenbus_device *dev =
+		container_of(watch, struct xenbus_device, otherend_watch);
+	struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
+	enum xenbus_state state;
+
+	/* Protect us against watches firing on old details when the otherend
+	   details change, say immediately after a resume. */
+	if (!dev->otherend ||
+	    strncmp(dev->otherend, path, strlen(dev->otherend))) {
+		dev_dbg(&dev->dev, "Ignoring watch at %s\n", path);
+		return;
+	}
+
+	state = xenbus_read_driver_state(dev->otherend);
+
+	dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n",
+		state, xenbus_strstate(state), dev->otherend_watch.node, path);
+
+	/*
+	 * Ignore xenbus transitions during shutdown. This prevents us doing
+	 * work that can fail e.g., when the rootfs is gone.
+	 */
+	if (system_state > SYSTEM_RUNNING) {
+		if (ignore_on_shutdown && (state == XenbusStateClosing))
+			xenbus_frontend_closed(dev);
+		return;
+	}
+
+	if (drv->otherend_changed)
+		drv->otherend_changed(dev, state);
+}
+EXPORT_SYMBOL_GPL(xenbus_otherend_changed);
+
+#define XENBUS_SHOW_STAT(name)						\
+static ssize_t name##_show(struct device *_dev,				\
+			   struct device_attribute *attr,		\
+			   char *buf)					\
+{									\
+	struct xenbus_device *dev = to_xenbus_device(_dev);		\
+									\
+	return sprintf(buf, "%d\n", atomic_read(&dev->name));		\
+}									\
+static DEVICE_ATTR_RO(name)
+
+XENBUS_SHOW_STAT(event_channels);
+XENBUS_SHOW_STAT(events);
+XENBUS_SHOW_STAT(spurious_events);
+XENBUS_SHOW_STAT(jiffies_eoi_delayed);
+
+static ssize_t spurious_threshold_show(struct device *_dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct xenbus_device *dev = to_xenbus_device(_dev);
+
+	return sprintf(buf, "%d\n", dev->spurious_threshold);
+}
+
+static ssize_t spurious_threshold_store(struct device *_dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct xenbus_device *dev = to_xenbus_device(_dev);
+	unsigned int val;
+	ssize_t ret;
+
+	ret = kstrtouint(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	dev->spurious_threshold = val;
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(spurious_threshold);
+
+static struct attribute *xenbus_attrs[] = {
+	&dev_attr_event_channels.attr,
+	&dev_attr_events.attr,
+	&dev_attr_spurious_events.attr,
+	&dev_attr_jiffies_eoi_delayed.attr,
+	&dev_attr_spurious_threshold.attr,
+	NULL
+};
+
+static const struct attribute_group xenbus_group = {
+	.name = "xenbus",
+	.attrs = xenbus_attrs,
+};
+
+int xenbus_dev_probe(struct device *_dev)
+{
+	struct xenbus_device *dev = to_xenbus_device(_dev);
+	struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
+	const struct xenbus_device_id *id;
+	int err;
+
+	DPRINTK("%s", dev->nodename);
+
+	if (!drv->probe) {
+		err = -ENODEV;
+		goto fail;
+	}
+
+	id = match_device(drv->ids, dev);
+	if (!id) {
+		err = -ENODEV;
+		goto fail;
+	}
+
+	err = talk_to_otherend(dev);
+	if (err) {
+		dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n",
+			 dev->nodename);
+		return err;
+	}
+
+	if (!try_module_get(drv->driver.owner)) {
+		dev_warn(&dev->dev, "failed to acquire module reference on '%s'\n",
+			 drv->driver.name);
+		err = -ESRCH;
+		goto fail;
+	}
+
+	down(&dev->reclaim_sem);
+	err = drv->probe(dev, id);
+	up(&dev->reclaim_sem);
+	if (err)
+		goto fail_put;
+
+	err = watch_otherend(dev);
+	if (err) {
+		dev_warn(&dev->dev, "watch_otherend on %s failed.\n",
+		       dev->nodename);
+		return err;
+	}
+
+	dev->spurious_threshold = 1;
+	if (sysfs_create_group(&dev->dev.kobj, &xenbus_group))
+		dev_warn(&dev->dev, "sysfs_create_group on %s failed.\n",
+			 dev->nodename);
+
+	return 0;
+fail_put:
+	module_put(drv->driver.owner);
+fail:
+	xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_probe);
+
+void xenbus_dev_remove(struct device *_dev)
+{
+	struct xenbus_device *dev = to_xenbus_device(_dev);
+	struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
+
+	DPRINTK("%s", dev->nodename);
+
+	sysfs_remove_group(&dev->dev.kobj, &xenbus_group);
+
+	free_otherend_watch(dev);
+
+	if (drv->remove) {
+		down(&dev->reclaim_sem);
+		drv->remove(dev);
+		up(&dev->reclaim_sem);
+	}
+
+	module_put(drv->driver.owner);
+
+	free_otherend_details(dev);
+
+	/*
+	 * If the toolstack has forced the device state to closing then set
+	 * the state to closed now to allow it to be cleaned up.
+	 * Similarly, if the driver does not support re-bind, set the
+	 * closed.
+	 */
+	if (!drv->allow_rebind ||
+	    xenbus_read_driver_state(dev->nodename) == XenbusStateClosing)
+		xenbus_switch_state(dev, XenbusStateClosed);
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_remove);
+
+int xenbus_register_driver_common(struct xenbus_driver *drv,
+				  struct xen_bus_type *bus,
+				  struct module *owner, const char *mod_name)
+{
+	drv->driver.name = drv->name ? drv->name : drv->ids[0].devicetype;
+	drv->driver.bus = &bus->bus;
+	drv->driver.owner = owner;
+	drv->driver.mod_name = mod_name;
+
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(xenbus_register_driver_common);
+
+void xenbus_unregister_driver(struct xenbus_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(xenbus_unregister_driver);
+
+struct xb_find_info {
+	struct xenbus_device *dev;
+	const char *nodename;
+};
+
+static int cmp_dev(struct device *dev, void *data)
+{
+	struct xenbus_device *xendev = to_xenbus_device(dev);
+	struct xb_find_info *info = data;
+
+	if (!strcmp(xendev->nodename, info->nodename)) {
+		info->dev = xendev;
+		get_device(dev);
+		return 1;
+	}
+	return 0;
+}
+
+static struct xenbus_device *xenbus_device_find(const char *nodename,
+						struct bus_type *bus)
+{
+	struct xb_find_info info = { .dev = NULL, .nodename = nodename };
+
+	bus_for_each_dev(bus, NULL, &info, cmp_dev);
+	return info.dev;
+}
+
+static int cleanup_dev(struct device *dev, void *data)
+{
+	struct xenbus_device *xendev = to_xenbus_device(dev);
+	struct xb_find_info *info = data;
+	int len = strlen(info->nodename);
+
+	DPRINTK("%s", info->nodename);
+
+	/* Match the info->nodename path, or any subdirectory of that path. */
+	if (strncmp(xendev->nodename, info->nodename, len))
+		return 0;
+
+	/* If the node name is longer, ensure it really is a subdirectory. */
+	if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/'))
+		return 0;
+
+	info->dev = xendev;
+	get_device(dev);
+	return 1;
+}
+
+static void xenbus_cleanup_devices(const char *path, struct bus_type *bus)
+{
+	struct xb_find_info info = { .nodename = path };
+
+	do {
+		info.dev = NULL;
+		bus_for_each_dev(bus, NULL, &info, cleanup_dev);
+		if (info.dev) {
+			device_unregister(&info.dev->dev);
+			put_device(&info.dev->dev);
+		}
+	} while (info.dev);
+}
+
+static void xenbus_dev_release(struct device *dev)
+{
+	if (dev)
+		kfree(to_xenbus_device(dev));
+}
+
+static ssize_t nodename_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
+}
+static DEVICE_ATTR_RO(nodename);
+
+static ssize_t devtype_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
+}
+static DEVICE_ATTR_RO(devtype);
+
+static ssize_t modalias_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s:%s\n", dev->bus->name,
+		       to_xenbus_device(dev)->devicetype);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static ssize_t state_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n",
+			xenbus_strstate(to_xenbus_device(dev)->state));
+}
+static DEVICE_ATTR_RO(state);
+
+static struct attribute *xenbus_dev_attrs[] = {
+	&dev_attr_nodename.attr,
+	&dev_attr_devtype.attr,
+	&dev_attr_modalias.attr,
+	&dev_attr_state.attr,
+	NULL,
+};
+
+static const struct attribute_group xenbus_dev_group = {
+	.attrs = xenbus_dev_attrs,
+};
+
+const struct attribute_group *xenbus_dev_groups[] = {
+	&xenbus_dev_group,
+	NULL,
+};
+EXPORT_SYMBOL_GPL(xenbus_dev_groups);
+
+int xenbus_probe_node(struct xen_bus_type *bus,
+		      const char *type,
+		      const char *nodename)
+{
+	char devname[XEN_BUS_ID_SIZE];
+	int err;
+	struct xenbus_device *xendev;
+	size_t stringlen;
+	char *tmpstring;
+
+	enum xenbus_state state = xenbus_read_driver_state(nodename);
+
+	if (state != XenbusStateInitialising) {
+		/* Device is not new, so ignore it.  This can happen if a
+		   device is going away after switching to Closed.  */
+		return 0;
+	}
+
+	stringlen = strlen(nodename) + 1 + strlen(type) + 1;
+	xendev = kzalloc(sizeof(*xendev) + stringlen, GFP_KERNEL);
+	if (!xendev)
+		return -ENOMEM;
+
+	xendev->state = XenbusStateInitialising;
+
+	/* Copy the strings into the extra space. */
+
+	tmpstring = (char *)(xendev + 1);
+	strcpy(tmpstring, nodename);
+	xendev->nodename = tmpstring;
+
+	tmpstring += strlen(tmpstring) + 1;
+	strcpy(tmpstring, type);
+	xendev->devicetype = tmpstring;
+	init_completion(&xendev->down);
+
+	xendev->dev.bus = &bus->bus;
+	xendev->dev.release = xenbus_dev_release;
+
+	err = bus->get_bus_id(devname, xendev->nodename);
+	if (err)
+		goto fail;
+
+	dev_set_name(&xendev->dev, "%s", devname);
+	sema_init(&xendev->reclaim_sem, 1);
+
+	/* Register with generic device framework. */
+	err = device_register(&xendev->dev);
+	if (err) {
+		put_device(&xendev->dev);
+		xendev = NULL;
+		goto fail;
+	}
+
+	return 0;
+fail:
+	kfree(xendev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_probe_node);
+
+static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
+{
+	int err = 0;
+	char **dir;
+	unsigned int dir_n = 0;
+	int i;
+
+	dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+
+	for (i = 0; i < dir_n; i++) {
+		err = bus->probe(bus, type, dir[i]);
+		if (err)
+			break;
+	}
+
+	kfree(dir);
+	return err;
+}
+
+int xenbus_probe_devices(struct xen_bus_type *bus)
+{
+	int err = 0;
+	char **dir;
+	unsigned int i, dir_n;
+
+	dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+
+	for (i = 0; i < dir_n; i++) {
+		err = xenbus_probe_device_type(bus, dir[i]);
+		if (err)
+			break;
+	}
+
+	kfree(dir);
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_probe_devices);
+
+static unsigned int char_count(const char *str, char c)
+{
+	unsigned int i, ret = 0;
+
+	for (i = 0; str[i]; i++)
+		if (str[i] == c)
+			ret++;
+	return ret;
+}
+
+static int strsep_len(const char *str, char c, unsigned int len)
+{
+	unsigned int i;
+
+	for (i = 0; str[i]; i++)
+		if (str[i] == c) {
+			if (len == 0)
+				return i;
+			len--;
+		}
+	return (len == 0) ? i : -ERANGE;
+}
+
+void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
+{
+	int exists, rootlen;
+	struct xenbus_device *dev;
+	char type[XEN_BUS_ID_SIZE];
+	const char *p, *root;
+
+	if (char_count(node, '/') < 2)
+		return;
+
+	exists = xenbus_exists(XBT_NIL, node, "");
+	if (!exists) {
+		xenbus_cleanup_devices(node, &bus->bus);
+		return;
+	}
+
+	/* backend/<type>/... or device/<type>/... */
+	p = strchr(node, '/') + 1;
+	snprintf(type, XEN_BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p);
+	type[XEN_BUS_ID_SIZE-1] = '\0';
+
+	rootlen = strsep_len(node, '/', bus->levels);
+	if (rootlen < 0)
+		return;
+	root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node);
+	if (!root)
+		return;
+
+	dev = xenbus_device_find(root, &bus->bus);
+	if (!dev)
+		xenbus_probe_node(bus, type, root);
+	else
+		put_device(&dev->dev);
+
+	kfree(root);
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_changed);
+
+int xenbus_dev_suspend(struct device *dev)
+{
+	int err = 0;
+	struct xenbus_driver *drv;
+	struct xenbus_device *xdev
+		= container_of(dev, struct xenbus_device, dev);
+
+	DPRINTK("%s", xdev->nodename);
+
+	if (dev->driver == NULL)
+		return 0;
+	drv = to_xenbus_driver(dev->driver);
+	if (drv->suspend)
+		err = drv->suspend(xdev);
+	if (err)
+		dev_warn(dev, "suspend failed: %i\n", err);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_suspend);
+
+int xenbus_dev_resume(struct device *dev)
+{
+	int err;
+	struct xenbus_driver *drv;
+	struct xenbus_device *xdev
+		= container_of(dev, struct xenbus_device, dev);
+
+	DPRINTK("%s", xdev->nodename);
+
+	if (dev->driver == NULL)
+		return 0;
+	drv = to_xenbus_driver(dev->driver);
+	err = talk_to_otherend(xdev);
+	if (err) {
+		dev_warn(dev, "resume (talk_to_otherend) failed: %i\n", err);
+		return err;
+	}
+
+	xdev->state = XenbusStateInitialising;
+
+	if (drv->resume) {
+		err = drv->resume(xdev);
+		if (err) {
+			dev_warn(dev, "resume failed: %i\n", err);
+			return err;
+		}
+	}
+
+	err = watch_otherend(xdev);
+	if (err) {
+		dev_warn(dev, "resume (watch_otherend) failed: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_resume);
+
+int xenbus_dev_cancel(struct device *dev)
+{
+	/* Do nothing */
+	DPRINTK("cancel");
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_cancel);
+
+/* A flag to determine if xenstored is 'ready' (i.e. has started) */
+int xenstored_ready;
+
+
+int register_xenstore_notifier(struct notifier_block *nb)
+{
+	int ret = 0;
+
+	if (xenstored_ready > 0)
+		ret = nb->notifier_call(nb, 0, NULL);
+	else
+		blocking_notifier_chain_register(&xenstore_chain, nb);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(register_xenstore_notifier);
+
+void unregister_xenstore_notifier(struct notifier_block *nb)
+{
+	blocking_notifier_chain_unregister(&xenstore_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
+
+static void xenbus_probe(void)
+{
+	xenstored_ready = 1;
+
+	if (!xen_store_interface) {
+		xen_store_interface = memremap(xen_store_gfn << XEN_PAGE_SHIFT,
+					       XEN_PAGE_SIZE, MEMREMAP_WB);
+		/*
+		 * Now it is safe to free the IRQ used for xenstore late
+		 * initialization. No need to unbind: it is about to be
+		 * bound again from xb_init_comms. Note that calling
+		 * unbind_from_irqhandler now would result in xen_evtchn_close()
+		 * being called and the event channel not being enabled again
+		 * afterwards, resulting in missed event notifications.
+		 */
+		free_irq(xs_init_irq, &xb_waitq);
+	}
+
+	/*
+	 * In the HVM case, xenbus_init() deferred its call to
+	 * xs_init() in case callbacks were not operational yet.
+	 * So do it now.
+	 */
+	if (xen_store_domain_type == XS_HVM)
+		xs_init();
+
+	/* Notify others that xenstore is up */
+	blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
+}
+
+/*
+ * Returns true when XenStore init must be deferred in order to
+ * allow the PCI platform device to be initialised, before we
+ * can actually have event channel interrupts working.
+ */
+static bool xs_hvm_defer_init_for_callback(void)
+{
+#ifdef CONFIG_XEN_PVHVM
+	return xen_store_domain_type == XS_HVM &&
+		!xen_have_vector_callback;
+#else
+	return false;
+#endif
+}
+
+static int xenbus_probe_thread(void *unused)
+{
+	DEFINE_WAIT(w);
+
+	/*
+	 * We actually just want to wait for *any* trigger of xb_waitq,
+	 * and run xenbus_probe() the moment it occurs.
+	 */
+	prepare_to_wait(&xb_waitq, &w, TASK_INTERRUPTIBLE);
+	schedule();
+	finish_wait(&xb_waitq, &w);
+
+	DPRINTK("probing");
+	xenbus_probe();
+	return 0;
+}
+
+static int __init xenbus_probe_initcall(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	/*
+	 * Probe XenBus here in the XS_PV case, and also XS_HVM unless we
+	 * need to wait for the platform PCI device to come up or
+	 * xen_store_interface is not ready.
+	 */
+	if (xen_store_domain_type == XS_PV ||
+	    (xen_store_domain_type == XS_HVM &&
+	     !xs_hvm_defer_init_for_callback() &&
+	     xen_store_interface != NULL))
+		xenbus_probe();
+
+	/*
+	 * For XS_LOCAL or when xen_store_interface is not ready, spawn a
+	 * thread which will wait for xenstored or a xenstore-stubdom to be
+	 * started, then probe.  It will be triggered when communication
+	 * starts happening, by waiting on xb_waitq.
+	 */
+	if (xen_store_domain_type == XS_LOCAL || xen_store_interface == NULL) {
+		struct task_struct *probe_task;
+
+		probe_task = kthread_run(xenbus_probe_thread, NULL,
+					 "xenbus_probe");
+		if (IS_ERR(probe_task))
+			return PTR_ERR(probe_task);
+	}
+	return 0;
+}
+device_initcall(xenbus_probe_initcall);
+
+int xen_set_callback_via(uint64_t via)
+{
+	struct xen_hvm_param a;
+	int ret;
+
+	a.domid = DOMID_SELF;
+	a.index = HVM_PARAM_CALLBACK_IRQ;
+	a.value = via;
+
+	ret = HYPERVISOR_hvm_op(HVMOP_set_param, &a);
+	if (ret)
+		return ret;
+
+	/*
+	 * If xenbus_probe_initcall() deferred the xenbus_probe()
+	 * due to the callback not functioning yet, we can do it now.
+	 */
+	if (!xenstored_ready && xs_hvm_defer_init_for_callback())
+		xenbus_probe();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xen_set_callback_via);
+
+/* Set up event channel for xenstored which is run as a local process
+ * (this is normally used only in dom0)
+ */
+static int __init xenstored_local_init(void)
+{
+	int err = -ENOMEM;
+	unsigned long page = 0;
+	struct evtchn_alloc_unbound alloc_unbound;
+
+	/* Allocate Xenstore page */
+	page = get_zeroed_page(GFP_KERNEL);
+	if (!page)
+		goto out_err;
+
+	xen_store_gfn = virt_to_gfn((void *)page);
+
+	/* Next allocate a local port which xenstored can bind to */
+	alloc_unbound.dom        = DOMID_SELF;
+	alloc_unbound.remote_dom = DOMID_SELF;
+
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+					  &alloc_unbound);
+	if (err == -ENOSYS)
+		goto out_err;
+
+	BUG_ON(err);
+	xen_store_evtchn = alloc_unbound.port;
+
+	return 0;
+
+ out_err:
+	if (page != 0)
+		free_page(page);
+	return err;
+}
+
+static int xenbus_resume_cb(struct notifier_block *nb,
+			    unsigned long action, void *data)
+{
+	int err = 0;
+
+	if (xen_hvm_domain()) {
+		uint64_t v = 0;
+
+		err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+		if (!err && v)
+			xen_store_evtchn = v;
+		else
+			pr_warn("Cannot update xenstore event channel: %d\n",
+				err);
+	} else
+		xen_store_evtchn = xen_start_info->store_evtchn;
+
+	return err;
+}
+
+static struct notifier_block xenbus_resume_nb = {
+	.notifier_call = xenbus_resume_cb,
+};
+
+static irqreturn_t xenbus_late_init(int irq, void *unused)
+{
+	int err;
+	uint64_t v = 0;
+
+	err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+	if (err || !v || !~v)
+		return IRQ_HANDLED;
+	xen_store_gfn = (unsigned long)v;
+
+	wake_up(&xb_waitq);
+	return IRQ_HANDLED;
+}
+
+static int __init xenbus_init(void)
+{
+	int err;
+	uint64_t v = 0;
+	bool wait = false;
+	xen_store_domain_type = XS_UNKNOWN;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	xenbus_ring_ops_init();
+
+	if (xen_pv_domain())
+		xen_store_domain_type = XS_PV;
+	if (xen_hvm_domain())
+		xen_store_domain_type = XS_HVM;
+	if (xen_hvm_domain() && xen_initial_domain())
+		xen_store_domain_type = XS_LOCAL;
+	if (xen_pv_domain() && !xen_start_info->store_evtchn)
+		xen_store_domain_type = XS_LOCAL;
+	if (xen_pv_domain() && xen_start_info->store_evtchn)
+		xenstored_ready = 1;
+
+	switch (xen_store_domain_type) {
+	case XS_LOCAL:
+		err = xenstored_local_init();
+		if (err)
+			goto out_error;
+		xen_store_interface = gfn_to_virt(xen_store_gfn);
+		break;
+	case XS_PV:
+		xen_store_evtchn = xen_start_info->store_evtchn;
+		xen_store_gfn = xen_start_info->store_mfn;
+		xen_store_interface = gfn_to_virt(xen_store_gfn);
+		break;
+	case XS_HVM:
+		err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+		if (err)
+			goto out_error;
+		xen_store_evtchn = (int)v;
+		err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+		if (err)
+			goto out_error;
+		/*
+		 * Uninitialized hvm_params are zero and return no error.
+		 * Although it is theoretically possible to have
+		 * HVM_PARAM_STORE_PFN set to zero on purpose, in reality it is
+		 * not zero when valid. If zero, it means that Xenstore hasn't
+		 * been properly initialized. Instead of attempting to map a
+		 * wrong guest physical address return error.
+		 *
+		 * Also recognize all bits set as an invalid/uninitialized value.
+		 */
+		if (!v) {
+			err = -ENOENT;
+			goto out_error;
+		}
+		if (v == ~0ULL) {
+			wait = true;
+		} else {
+			/* Avoid truncation on 32-bit. */
+#if BITS_PER_LONG == 32
+			if (v > ULONG_MAX) {
+				pr_err("%s: cannot handle HVM_PARAM_STORE_PFN=%llx > ULONG_MAX\n",
+				       __func__, v);
+				err = -EINVAL;
+				goto out_error;
+			}
+#endif
+			xen_store_gfn = (unsigned long)v;
+			xen_store_interface =
+				memremap(xen_store_gfn << XEN_PAGE_SHIFT,
+					 XEN_PAGE_SIZE, MEMREMAP_WB);
+			if (xen_store_interface->connection != XENSTORE_CONNECTED)
+				wait = true;
+		}
+		if (wait) {
+			err = bind_evtchn_to_irqhandler(xen_store_evtchn,
+							xenbus_late_init,
+							0, "xenstore_late_init",
+							&xb_waitq);
+			if (err < 0) {
+				pr_err("xenstore_late_init couldn't bind irq err=%d\n",
+				       err);
+				goto out_error;
+			}
+
+			xs_init_irq = err;
+		}
+		break;
+	default:
+		pr_warn("Xenstore state unknown\n");
+		break;
+	}
+
+	/*
+	 * HVM domains may not have a functional callback yet. In that
+	 * case let xs_init() be called from xenbus_probe(), which will
+	 * get invoked at an appropriate time.
+	 */
+	if (xen_store_domain_type != XS_HVM) {
+		err = xs_init();
+		if (err) {
+			pr_warn("Error initializing xenstore comms: %i\n", err);
+			goto out_error;
+		}
+	}
+
+	if ((xen_store_domain_type != XS_LOCAL) &&
+	    (xen_store_domain_type != XS_UNKNOWN))
+		xen_resume_notifier_register(&xenbus_resume_nb);
+
+#ifdef CONFIG_XEN_COMPAT_XENFS
+	/*
+	 * Create xenfs mountpoint in /proc for compatibility with
+	 * utilities that expect to find "xenbus" under "/proc/xen".
+	 */
+	proc_create_mount_point("xen");
+#endif
+	return 0;
+
+out_error:
+	xen_store_domain_type = XS_UNKNOWN;
+	return err;
+}
+
+postcore_initcall(xenbus_init);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
new file mode 100644
index 000000000..9c09f89d8
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -0,0 +1,313 @@
+/******************************************************************************
+ * Talks to Xen Store to figure out what devices we have (backend half).
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
+ * Copyright (C) 2005, 2006 XenSource Ltd
+ * Copyright (C) 2007 Solarflare Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define DPRINTK(fmt, ...)				\
+	pr_debug("(%s:%d) " fmt "\n",			\
+		 __func__, __LINE__, ##__VA_ARGS__)
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <linux/semaphore.h>
+
+#include <asm/page.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/hypervisor.h>
+#include <xen/xenbus.h>
+#include <xen/features.h>
+
+#include "xenbus.h"
+
+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
+static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
+{
+	int domid, err;
+	const char *devid, *type, *frontend;
+	unsigned int typelen;
+
+	type = strchr(nodename, '/');
+	if (!type)
+		return -EINVAL;
+	type++;
+	typelen = strcspn(type, "/");
+	if (!typelen || type[typelen] != '/')
+		return -EINVAL;
+
+	devid = strrchr(nodename, '/') + 1;
+
+	err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
+			    "frontend", NULL, &frontend,
+			    NULL);
+	if (err)
+		return err;
+	if (strlen(frontend) == 0)
+		err = -ERANGE;
+	if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
+		err = -ENOENT;
+	kfree(frontend);
+
+	if (err)
+		return err;
+
+	if (snprintf(bus_id, XEN_BUS_ID_SIZE, "%.*s-%i-%s",
+		     typelen, type, domid, devid) >= XEN_BUS_ID_SIZE)
+		return -ENOSPC;
+	return 0;
+}
+
+static int xenbus_uevent_backend(struct device *dev,
+				 struct kobj_uevent_env *env)
+{
+	struct xenbus_device *xdev;
+	struct xenbus_driver *drv;
+	struct xen_bus_type *bus;
+
+	DPRINTK("");
+
+	if (dev == NULL)
+		return -ENODEV;
+
+	xdev = to_xenbus_device(dev);
+	bus = container_of(xdev->dev.bus, struct xen_bus_type, bus);
+
+	if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
+		return -ENOMEM;
+
+	/* stuff we want to pass to /sbin/hotplug */
+	if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "XENBUS_BASE_PATH=%s", bus->root))
+		return -ENOMEM;
+
+	if (dev->driver) {
+		drv = to_xenbus_driver(dev->driver);
+		if (drv && drv->uevent)
+			return drv->uevent(xdev, env);
+	}
+
+	return 0;
+}
+
+/* backend/<typename>/<frontend-uuid>/<name> */
+static int xenbus_probe_backend_unit(struct xen_bus_type *bus,
+				     const char *dir,
+				     const char *type,
+				     const char *name)
+{
+	char *nodename;
+	int err;
+
+	nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
+	if (!nodename)
+		return -ENOMEM;
+
+	DPRINTK("%s\n", nodename);
+
+	err = xenbus_probe_node(bus, type, nodename);
+	kfree(nodename);
+	return err;
+}
+
+/* backend/<typename>/<frontend-domid> */
+static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type,
+				const char *domid)
+{
+	char *nodename;
+	int err = 0;
+	char **dir;
+	unsigned int i, dir_n = 0;
+
+	DPRINTK("");
+
+	nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, domid);
+	if (!nodename)
+		return -ENOMEM;
+
+	dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
+	if (IS_ERR(dir)) {
+		kfree(nodename);
+		return PTR_ERR(dir);
+	}
+
+	for (i = 0; i < dir_n; i++) {
+		err = xenbus_probe_backend_unit(bus, nodename, type, dir[i]);
+		if (err)
+			break;
+	}
+	kfree(dir);
+	kfree(nodename);
+	return err;
+}
+
+static bool frontend_will_handle(struct xenbus_watch *watch,
+				 const char *path, const char *token)
+{
+	return watch->nr_pending == 0;
+}
+
+static void frontend_changed(struct xenbus_watch *watch,
+			     const char *path, const char *token)
+{
+	xenbus_otherend_changed(watch, path, token, 0);
+}
+
+static struct xen_bus_type xenbus_backend = {
+	.root = "backend",
+	.levels = 3,		/* backend/type/<frontend>/<id> */
+	.get_bus_id = backend_bus_id,
+	.probe = xenbus_probe_backend,
+	.otherend_will_handle = frontend_will_handle,
+	.otherend_changed = frontend_changed,
+	.bus = {
+		.name		= "xen-backend",
+		.match		= xenbus_match,
+		.uevent		= xenbus_uevent_backend,
+		.probe		= xenbus_dev_probe,
+		.remove		= xenbus_dev_remove,
+		.dev_groups	= xenbus_dev_groups,
+	},
+};
+
+static void backend_changed(struct xenbus_watch *watch,
+			    const char *path, const char *token)
+{
+	DPRINTK("");
+
+	xenbus_dev_changed(path, &xenbus_backend);
+}
+
+static struct xenbus_watch be_watch = {
+	.node = "backend",
+	.callback = backend_changed,
+};
+
+static int read_frontend_details(struct xenbus_device *xendev)
+{
+	return xenbus_read_otherend_details(xendev, "frontend-id", "frontend");
+}
+
+int xenbus_dev_is_online(struct xenbus_device *dev)
+{
+	return !!xenbus_read_unsigned(dev->nodename, "online", 0);
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
+
+int __xenbus_register_backend(struct xenbus_driver *drv, struct module *owner,
+			      const char *mod_name)
+{
+	drv->read_otherend_details = read_frontend_details;
+
+	return xenbus_register_driver_common(drv, &xenbus_backend,
+					     owner, mod_name);
+}
+EXPORT_SYMBOL_GPL(__xenbus_register_backend);
+
+static int backend_probe_and_watch(struct notifier_block *notifier,
+				   unsigned long event,
+				   void *data)
+{
+	/* Enumerate devices in xenstore and watch for changes. */
+	xenbus_probe_devices(&xenbus_backend);
+	register_xenbus_watch(&be_watch);
+
+	return NOTIFY_DONE;
+}
+
+static int backend_reclaim_memory(struct device *dev, void *data)
+{
+	const struct xenbus_driver *drv;
+	struct xenbus_device *xdev;
+
+	if (!dev->driver)
+		return 0;
+	drv = to_xenbus_driver(dev->driver);
+	if (drv && drv->reclaim_memory) {
+		xdev = to_xenbus_device(dev);
+		if (down_trylock(&xdev->reclaim_sem))
+			return 0;
+		drv->reclaim_memory(xdev);
+		up(&xdev->reclaim_sem);
+	}
+	return 0;
+}
+
+/*
+ * Returns 0 always because we are using shrinker to only detect memory
+ * pressure.
+ */
+static unsigned long backend_shrink_memory_count(struct shrinker *shrinker,
+				struct shrink_control *sc)
+{
+	bus_for_each_dev(&xenbus_backend.bus, NULL, NULL,
+			backend_reclaim_memory);
+	return 0;
+}
+
+static struct shrinker backend_memory_shrinker = {
+	.count_objects = backend_shrink_memory_count,
+	.seeks = DEFAULT_SEEKS,
+};
+
+static int __init xenbus_probe_backend_init(void)
+{
+	static struct notifier_block xenstore_notifier = {
+		.notifier_call = backend_probe_and_watch
+	};
+	int err;
+
+	DPRINTK("");
+
+	/* Register ourselves with the kernel bus subsystem */
+	err = bus_register(&xenbus_backend.bus);
+	if (err)
+		return err;
+
+	register_xenstore_notifier(&xenstore_notifier);
+
+	if (register_shrinker(&backend_memory_shrinker, "xen-backend"))
+		pr_warn("shrinker registration failed\n");
+
+	return 0;
+}
+subsys_initcall(xenbus_probe_backend_init);
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
new file mode 100644
index 000000000..f44d5a643
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define DPRINTK(fmt, ...)				\
+	pr_debug("(%s:%d) " fmt "\n",			\
+		 __func__, __LINE__, ##__VA_ARGS__)
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+#include <linux/module.h>
+
+#include <asm/page.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include <xen/xen.h>
+
+#include <xen/platform_pci.h>
+
+#include "xenbus.h"
+
+
+
+/* device/<type>/<id> => <type>-<id> */
+static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
+{
+	nodename = strchr(nodename, '/');
+	if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) {
+		pr_warn("bad frontend %s\n", nodename);
+		return -EINVAL;
+	}
+
+	strscpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE);
+	if (!strchr(bus_id, '/')) {
+		pr_warn("bus_id %s no slash\n", bus_id);
+		return -EINVAL;
+	}
+	*strchr(bus_id, '/') = '-';
+	return 0;
+}
+
+/* device/<typename>/<name> */
+static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type,
+				 const char *name)
+{
+	char *nodename;
+	int err;
+
+	/* ignore console/0 */
+	if (!strncmp(type, "console", 7) && !strncmp(name, "0", 1)) {
+		DPRINTK("Ignoring buggy device entry console/0");
+		return 0;
+	}
+
+	nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name);
+	if (!nodename)
+		return -ENOMEM;
+
+	DPRINTK("%s", nodename);
+
+	err = xenbus_probe_node(bus, type, nodename);
+	kfree(nodename);
+	return err;
+}
+
+static int xenbus_uevent_frontend(struct device *_dev,
+				  struct kobj_uevent_env *env)
+{
+	struct xenbus_device *dev = to_xenbus_device(_dev);
+
+	if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype))
+		return -ENOMEM;
+
+	return 0;
+}
+
+
+static void backend_changed(struct xenbus_watch *watch,
+			    const char *path, const char *token)
+{
+	xenbus_otherend_changed(watch, path, token, 1);
+}
+
+static void xenbus_frontend_delayed_resume(struct work_struct *w)
+{
+	struct xenbus_device *xdev = container_of(w, struct xenbus_device, work);
+
+	xenbus_dev_resume(&xdev->dev);
+}
+
+static int xenbus_frontend_dev_resume(struct device *dev)
+{
+	/*
+	 * If xenstored is running in this domain, we cannot access the backend
+	 * state at the moment, so we need to defer xenbus_dev_resume
+	 */
+	if (xen_store_domain_type == XS_LOCAL) {
+		struct xenbus_device *xdev = to_xenbus_device(dev);
+
+		schedule_work(&xdev->work);
+
+		return 0;
+	}
+
+	return xenbus_dev_resume(dev);
+}
+
+static int xenbus_frontend_dev_probe(struct device *dev)
+{
+	if (xen_store_domain_type == XS_LOCAL) {
+		struct xenbus_device *xdev = to_xenbus_device(dev);
+		INIT_WORK(&xdev->work, xenbus_frontend_delayed_resume);
+	}
+
+	return xenbus_dev_probe(dev);
+}
+
+static void xenbus_frontend_dev_shutdown(struct device *_dev)
+{
+	struct xenbus_device *dev = to_xenbus_device(_dev);
+	unsigned long timeout = 5*HZ;
+
+	DPRINTK("%s", dev->nodename);
+
+	get_device(&dev->dev);
+	if (dev->state != XenbusStateConnected) {
+		pr_info("%s: %s: %s != Connected, skipping\n",
+			__func__, dev->nodename, xenbus_strstate(dev->state));
+		goto out;
+	}
+	xenbus_switch_state(dev, XenbusStateClosing);
+	timeout = wait_for_completion_timeout(&dev->down, timeout);
+	if (!timeout)
+		pr_info("%s: %s timeout closing device\n",
+			__func__, dev->nodename);
+ out:
+	put_device(&dev->dev);
+}
+
+static const struct dev_pm_ops xenbus_pm_ops = {
+	.suspend	= xenbus_dev_suspend,
+	.resume		= xenbus_frontend_dev_resume,
+	.freeze		= xenbus_dev_suspend,
+	.thaw		= xenbus_dev_cancel,
+	.restore	= xenbus_dev_resume,
+};
+
+static struct xen_bus_type xenbus_frontend = {
+	.root = "device",
+	.levels = 2,		/* device/type/<id> */
+	.get_bus_id = frontend_bus_id,
+	.probe = xenbus_probe_frontend,
+	.otherend_changed = backend_changed,
+	.bus = {
+		.name		= "xen",
+		.match		= xenbus_match,
+		.uevent		= xenbus_uevent_frontend,
+		.probe		= xenbus_frontend_dev_probe,
+		.remove		= xenbus_dev_remove,
+		.shutdown	= xenbus_frontend_dev_shutdown,
+		.dev_groups	= xenbus_dev_groups,
+
+		.pm		= &xenbus_pm_ops,
+	},
+};
+
+static void frontend_changed(struct xenbus_watch *watch,
+			     const char *path, const char *token)
+{
+	DPRINTK("");
+
+	xenbus_dev_changed(path, &xenbus_frontend);
+}
+
+
+/* We watch for devices appearing and vanishing. */
+static struct xenbus_watch fe_watch = {
+	.node = "device",
+	.callback = frontend_changed,
+};
+
+static int read_backend_details(struct xenbus_device *xendev)
+{
+	return xenbus_read_otherend_details(xendev, "backend-id", "backend");
+}
+
+static int is_device_connecting(struct device *dev, void *data, bool ignore_nonessential)
+{
+	struct xenbus_device *xendev = to_xenbus_device(dev);
+	struct device_driver *drv = data;
+	struct xenbus_driver *xendrv;
+
+	/*
+	 * A device with no driver will never connect. We care only about
+	 * devices which should currently be in the process of connecting.
+	 */
+	if (!dev->driver)
+		return 0;
+
+	/* Is this search limited to a particular driver? */
+	if (drv && (dev->driver != drv))
+		return 0;
+
+	xendrv = to_xenbus_driver(dev->driver);
+
+	if (ignore_nonessential && xendrv->not_essential)
+		return 0;
+
+	return (xendev->state < XenbusStateConnected ||
+		(xendev->state == XenbusStateConnected &&
+		 xendrv->is_ready && !xendrv->is_ready(xendev)));
+}
+static int essential_device_connecting(struct device *dev, void *data)
+{
+	return is_device_connecting(dev, data, true /* ignore PV[KBB+FB] */);
+}
+static int non_essential_device_connecting(struct device *dev, void *data)
+{
+	return is_device_connecting(dev, data, false);
+}
+
+static int exists_essential_connecting_device(struct device_driver *drv)
+{
+	return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+				essential_device_connecting);
+}
+static int exists_non_essential_connecting_device(struct device_driver *drv)
+{
+	return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+				non_essential_device_connecting);
+}
+
+static int print_device_status(struct device *dev, void *data)
+{
+	struct xenbus_device *xendev = to_xenbus_device(dev);
+	struct device_driver *drv = data;
+
+	/* Is this operation limited to a particular driver? */
+	if (drv && (dev->driver != drv))
+		return 0;
+
+	if (!dev->driver) {
+		/* Information only: is this too noisy? */
+		pr_info("Device with no driver: %s\n", xendev->nodename);
+	} else if (xendev->state < XenbusStateConnected) {
+		enum xenbus_state rstate = XenbusStateUnknown;
+		if (xendev->otherend)
+			rstate = xenbus_read_driver_state(xendev->otherend);
+		pr_warn("Timeout connecting to device: %s (local state %d, remote state %d)\n",
+			xendev->nodename, xendev->state, rstate);
+	}
+
+	return 0;
+}
+
+/* We only wait for device setup after most initcalls have run. */
+static int ready_to_wait_for_devices;
+
+static bool wait_loop(unsigned long start, unsigned int max_delay,
+		     unsigned int *seconds_waited)
+{
+	if (time_after(jiffies, start + (*seconds_waited+5)*HZ)) {
+		if (!*seconds_waited)
+			pr_warn("Waiting for devices to initialise: ");
+		*seconds_waited += 5;
+		pr_cont("%us...", max_delay - *seconds_waited);
+		if (*seconds_waited == max_delay) {
+			pr_cont("\n");
+			return true;
+		}
+	}
+
+	schedule_timeout_interruptible(HZ/10);
+
+	return false;
+}
+/*
+ * On a 5-minute timeout, wait for all devices currently configured.  We need
+ * to do this to guarantee that the filesystems and / or network devices
+ * needed for boot are available, before we can allow the boot to proceed.
+ *
+ * This needs to be on a late_initcall, to happen after the frontend device
+ * drivers have been initialised, but before the root fs is mounted.
+ *
+ * A possible improvement here would be to have the tools add a per-device
+ * flag to the store entry, indicating whether it is needed at boot time.
+ * This would allow people who knew what they were doing to accelerate their
+ * boot slightly, but of course needs tools or manual intervention to set up
+ * those flags correctly.
+ */
+static void wait_for_devices(struct xenbus_driver *xendrv)
+{
+	unsigned long start = jiffies;
+	struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
+	unsigned int seconds_waited = 0;
+
+	if (!ready_to_wait_for_devices || !xen_domain())
+		return;
+
+	while (exists_non_essential_connecting_device(drv))
+		if (wait_loop(start, 30, &seconds_waited))
+			break;
+
+	/* Skips PVKB and PVFB check.*/
+	while (exists_essential_connecting_device(drv))
+		if (wait_loop(start, 270, &seconds_waited))
+			break;
+
+	if (seconds_waited)
+		printk("\n");
+
+	bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+			 print_device_status);
+}
+
+int __xenbus_register_frontend(struct xenbus_driver *drv, struct module *owner,
+			       const char *mod_name)
+{
+	int ret;
+
+	drv->read_otherend_details = read_backend_details;
+
+	ret = xenbus_register_driver_common(drv, &xenbus_frontend,
+					    owner, mod_name);
+	if (ret)
+		return ret;
+
+	/* If this driver is loaded as a module wait for devices to attach. */
+	wait_for_devices(drv);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
+
+static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
+static int backend_state;
+
+static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
+					const char *path, const char *token)
+{
+	if (xenbus_scanf(XBT_NIL, path, "", "%i",
+			 &backend_state) != 1)
+		backend_state = XenbusStateUnknown;
+	printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+	       path, xenbus_strstate(backend_state));
+	wake_up(&backend_state_wq);
+}
+
+static void xenbus_reset_wait_for_backend(char *be, int expected)
+{
+	long timeout;
+	timeout = wait_event_interruptible_timeout(backend_state_wq,
+			backend_state == expected, 5 * HZ);
+	if (timeout <= 0)
+		pr_info("backend %s timed out\n", be);
+}
+
+/*
+ * Reset frontend if it is in Connected or Closed state.
+ * Wait for backend to catch up.
+ * State Connected happens during kdump, Closed after kexec.
+ */
+static void xenbus_reset_frontend(char *fe, char *be, int be_state)
+{
+	struct xenbus_watch be_watch;
+
+	printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+			be, xenbus_strstate(be_state));
+
+	memset(&be_watch, 0, sizeof(be_watch));
+	be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be);
+	if (!be_watch.node)
+		return;
+
+	be_watch.callback = xenbus_reset_backend_state_changed;
+	backend_state = XenbusStateUnknown;
+
+	pr_info("triggering reconnect on %s\n", be);
+	register_xenbus_watch(&be_watch);
+
+	/* fall through to forward backend to state XenbusStateInitialising */
+	switch (be_state) {
+	case XenbusStateConnected:
+		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing);
+		xenbus_reset_wait_for_backend(be, XenbusStateClosing);
+		fallthrough;
+
+	case XenbusStateClosing:
+		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed);
+		xenbus_reset_wait_for_backend(be, XenbusStateClosed);
+		fallthrough;
+
+	case XenbusStateClosed:
+		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising);
+		xenbus_reset_wait_for_backend(be, XenbusStateInitWait);
+	}
+
+	unregister_xenbus_watch(&be_watch);
+	pr_info("reconnect done on %s\n", be);
+	kfree(be_watch.node);
+}
+
+static void xenbus_check_frontend(char *class, char *dev)
+{
+	int be_state, fe_state, err;
+	char *backend, *frontend;
+
+	frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev);
+	if (!frontend)
+		return;
+
+	err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state);
+	if (err != 1)
+		goto out;
+
+	switch (fe_state) {
+	case XenbusStateConnected:
+	case XenbusStateClosed:
+		printk(KERN_DEBUG "XENBUS: frontend %s %s\n",
+				frontend, xenbus_strstate(fe_state));
+		backend = xenbus_read(XBT_NIL, frontend, "backend", NULL);
+		if (!backend || IS_ERR(backend))
+			goto out;
+		err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state);
+		if (err == 1)
+			xenbus_reset_frontend(frontend, backend, be_state);
+		kfree(backend);
+		break;
+	default:
+		break;
+	}
+out:
+	kfree(frontend);
+}
+
+static void xenbus_reset_state(void)
+{
+	char **devclass, **dev;
+	int devclass_n, dev_n;
+	int i, j;
+
+	devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n);
+	if (IS_ERR(devclass))
+		return;
+
+	for (i = 0; i < devclass_n; i++) {
+		dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n);
+		if (IS_ERR(dev))
+			continue;
+		for (j = 0; j < dev_n; j++)
+			xenbus_check_frontend(devclass[i], dev[j]);
+		kfree(dev);
+	}
+	kfree(devclass);
+}
+
+static int frontend_probe_and_watch(struct notifier_block *notifier,
+				   unsigned long event,
+				   void *data)
+{
+	/* reset devices in Connected or Closed state */
+	if (xen_hvm_domain())
+		xenbus_reset_state();
+	/* Enumerate devices in xenstore and watch for changes. */
+	xenbus_probe_devices(&xenbus_frontend);
+	register_xenbus_watch(&fe_watch);
+
+	return NOTIFY_DONE;
+}
+
+
+static int __init xenbus_probe_frontend_init(void)
+{
+	static struct notifier_block xenstore_notifier = {
+		.notifier_call = frontend_probe_and_watch
+	};
+	int err;
+
+	DPRINTK("");
+
+	/* Register ourselves with the kernel bus subsystem */
+	err = bus_register(&xenbus_frontend.bus);
+	if (err)
+		return err;
+
+	register_xenstore_notifier(&xenstore_notifier);
+
+	return 0;
+}
+subsys_initcall(xenbus_probe_frontend_init);
+
+#ifndef MODULE
+static int __init boot_wait_for_devices(void)
+{
+	if (!xen_has_pv_devices())
+		return -ENODEV;
+
+	ready_to_wait_for_devices = 1;
+	wait_for_devices(NULL);
+	return 0;
+}
+
+late_initcall(boot_wait_for_devices);
+#endif
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
new file mode 100644
index 000000000..12e02eb01
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -0,0 +1,956 @@
+/******************************************************************************
+ * xenbus_xs.c
+ *
+ * This is the kernel equivalent of the "xs" library.  We don't need everything
+ * and we use xenbus_comms for communication.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/unistd.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/uio.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/fcntl.h>
+#include <linux/kthread.h>
+#include <linux/reboot.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/xenbus.h>
+#include <xen/xen.h>
+#include "xenbus.h"
+
+/*
+ * Framework to protect suspend/resume handling against normal Xenstore
+ * message handling:
+ * During suspend/resume there must be no open transaction and no pending
+ * Xenstore request.
+ * New watch events happening in this time can be ignored by firing all watches
+ * after resume.
+ */
+
+/* Lock protecting enter/exit critical region. */
+static DEFINE_SPINLOCK(xs_state_lock);
+/* Number of users in critical region (protected by xs_state_lock). */
+static unsigned int xs_state_users;
+/* Suspend handler waiting or already active (protected by xs_state_lock)? */
+static int xs_suspend_active;
+/* Unique Xenstore request id (protected by xs_state_lock). */
+static uint32_t xs_request_id;
+
+/* Wait queue for all callers waiting for critical region to become usable. */
+static DECLARE_WAIT_QUEUE_HEAD(xs_state_enter_wq);
+/* Wait queue for suspend handling waiting for critical region being empty. */
+static DECLARE_WAIT_QUEUE_HEAD(xs_state_exit_wq);
+
+/* List of registered watches, and a lock to protect it. */
+static LIST_HEAD(watches);
+static DEFINE_SPINLOCK(watches_lock);
+
+/* List of pending watch callback events, and a lock to protect it. */
+static LIST_HEAD(watch_events);
+static DEFINE_SPINLOCK(watch_events_lock);
+
+/* Protect watch (de)register against save/restore. */
+static DECLARE_RWSEM(xs_watch_rwsem);
+
+/*
+ * Details of the xenwatch callback kernel thread. The thread waits on the
+ * watch_events_waitq for work to do (queued on watch_events list). When it
+ * wakes up it acquires the xenwatch_mutex before reading the list and
+ * carrying out work.
+ */
+static pid_t xenwatch_pid;
+static DEFINE_MUTEX(xenwatch_mutex);
+static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
+
+static void xs_suspend_enter(void)
+{
+	spin_lock(&xs_state_lock);
+	xs_suspend_active++;
+	spin_unlock(&xs_state_lock);
+	wait_event(xs_state_exit_wq, xs_state_users == 0);
+}
+
+static void xs_suspend_exit(void)
+{
+	xb_dev_generation_id++;
+	spin_lock(&xs_state_lock);
+	xs_suspend_active--;
+	spin_unlock(&xs_state_lock);
+	wake_up_all(&xs_state_enter_wq);
+}
+
+static uint32_t xs_request_enter(struct xb_req_data *req)
+{
+	uint32_t rq_id;
+
+	req->type = req->msg.type;
+
+	spin_lock(&xs_state_lock);
+
+	while (!xs_state_users && xs_suspend_active) {
+		spin_unlock(&xs_state_lock);
+		wait_event(xs_state_enter_wq, xs_suspend_active == 0);
+		spin_lock(&xs_state_lock);
+	}
+
+	if (req->type == XS_TRANSACTION_START && !req->user_req)
+		xs_state_users++;
+	xs_state_users++;
+	rq_id = xs_request_id++;
+
+	spin_unlock(&xs_state_lock);
+
+	return rq_id;
+}
+
+void xs_request_exit(struct xb_req_data *req)
+{
+	spin_lock(&xs_state_lock);
+	xs_state_users--;
+	if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) ||
+	    (req->type == XS_TRANSACTION_END && !req->user_req &&
+	     !WARN_ON_ONCE(req->msg.type == XS_ERROR &&
+			   !strcmp(req->body, "ENOENT"))))
+		xs_state_users--;
+	spin_unlock(&xs_state_lock);
+
+	if (xs_suspend_active && !xs_state_users)
+		wake_up(&xs_state_exit_wq);
+}
+
+static int get_error(const char *errorstring)
+{
+	unsigned int i;
+
+	for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) {
+		if (i == ARRAY_SIZE(xsd_errors) - 1) {
+			pr_warn("xen store gave: unknown error %s\n",
+				errorstring);
+			return EINVAL;
+		}
+	}
+	return xsd_errors[i].errnum;
+}
+
+static bool xenbus_ok(void)
+{
+	switch (xen_store_domain_type) {
+	case XS_LOCAL:
+		switch (system_state) {
+		case SYSTEM_POWER_OFF:
+		case SYSTEM_RESTART:
+		case SYSTEM_HALT:
+			return false;
+		default:
+			break;
+		}
+		return true;
+	case XS_PV:
+	case XS_HVM:
+		/* FIXME: Could check that the remote domain is alive,
+		 * but it is normally initial domain. */
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
+
+static bool test_reply(struct xb_req_data *req)
+{
+	if (req->state == xb_req_state_got_reply || !xenbus_ok()) {
+		/* read req->state before all other fields */
+		virt_rmb();
+		return true;
+	}
+
+	/* Make sure to reread req->state each time. */
+	barrier();
+
+	return false;
+}
+
+static void *read_reply(struct xb_req_data *req)
+{
+	do {
+		wait_event(req->wq, test_reply(req));
+
+		if (!xenbus_ok())
+			/*
+			 * If we are in the process of being shut-down there is
+			 * no point of trying to contact XenBus - it is either
+			 * killed (xenstored application) or the other domain
+			 * has been killed or is unreachable.
+			 */
+			return ERR_PTR(-EIO);
+		if (req->err)
+			return ERR_PTR(req->err);
+
+	} while (req->state != xb_req_state_got_reply);
+
+	return req->body;
+}
+
+static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg)
+{
+	bool notify;
+
+	req->msg = *msg;
+	req->err = 0;
+	req->state = xb_req_state_queued;
+	init_waitqueue_head(&req->wq);
+
+	/* Save the caller req_id and restore it later in the reply */
+	req->caller_req_id = req->msg.req_id;
+	req->msg.req_id = xs_request_enter(req);
+
+	mutex_lock(&xb_write_mutex);
+	list_add_tail(&req->list, &xb_write_list);
+	notify = list_is_singular(&xb_write_list);
+	mutex_unlock(&xb_write_mutex);
+
+	if (notify)
+		wake_up(&xb_waitq);
+}
+
+static void *xs_wait_for_reply(struct xb_req_data *req, struct xsd_sockmsg *msg)
+{
+	void *ret;
+
+	ret = read_reply(req);
+
+	xs_request_exit(req);
+
+	msg->type = req->msg.type;
+	msg->len = req->msg.len;
+
+	mutex_lock(&xb_write_mutex);
+	if (req->state == xb_req_state_queued ||
+	    req->state == xb_req_state_wait_reply)
+		req->state = xb_req_state_aborted;
+	else
+		kfree(req);
+	mutex_unlock(&xb_write_mutex);
+
+	return ret;
+}
+
+static void xs_wake_up(struct xb_req_data *req)
+{
+	wake_up(&req->wq);
+}
+
+int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par)
+{
+	struct xb_req_data *req;
+	struct kvec *vec;
+
+	req = kmalloc(sizeof(*req) + sizeof(*vec), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	vec = (struct kvec *)(req + 1);
+	vec->iov_len = msg->len;
+	vec->iov_base = msg + 1;
+
+	req->vec = vec;
+	req->num_vecs = 1;
+	req->cb = xenbus_dev_queue_reply;
+	req->par = par;
+	req->user_req = true;
+
+	xs_send(req, msg);
+
+	return 0;
+}
+EXPORT_SYMBOL(xenbus_dev_request_and_reply);
+
+/* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
+static void *xs_talkv(struct xenbus_transaction t,
+		      enum xsd_sockmsg_type type,
+		      const struct kvec *iovec,
+		      unsigned int num_vecs,
+		      unsigned int *len)
+{
+	struct xb_req_data *req;
+	struct xsd_sockmsg msg;
+	void *ret = NULL;
+	unsigned int i;
+	int err;
+
+	req = kmalloc(sizeof(*req), GFP_NOIO | __GFP_HIGH);
+	if (!req)
+		return ERR_PTR(-ENOMEM);
+
+	req->vec = iovec;
+	req->num_vecs = num_vecs;
+	req->cb = xs_wake_up;
+	req->user_req = false;
+
+	msg.req_id = 0;
+	msg.tx_id = t.id;
+	msg.type = type;
+	msg.len = 0;
+	for (i = 0; i < num_vecs; i++)
+		msg.len += iovec[i].iov_len;
+
+	xs_send(req, &msg);
+
+	ret = xs_wait_for_reply(req, &msg);
+	if (len)
+		*len = msg.len;
+
+	if (IS_ERR(ret))
+		return ret;
+
+	if (msg.type == XS_ERROR) {
+		err = get_error(ret);
+		kfree(ret);
+		return ERR_PTR(-err);
+	}
+
+	if (msg.type != type) {
+		pr_warn_ratelimited("unexpected type [%d], expected [%d]\n",
+				    msg.type, type);
+		kfree(ret);
+		return ERR_PTR(-EINVAL);
+	}
+	return ret;
+}
+
+/* Simplified version of xs_talkv: single message. */
+static void *xs_single(struct xenbus_transaction t,
+		       enum xsd_sockmsg_type type,
+		       const char *string,
+		       unsigned int *len)
+{
+	struct kvec iovec;
+
+	iovec.iov_base = (void *)string;
+	iovec.iov_len = strlen(string) + 1;
+	return xs_talkv(t, type, &iovec, 1, len);
+}
+
+/* Many commands only need an ack, don't care what it says. */
+static int xs_error(char *reply)
+{
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+	kfree(reply);
+	return 0;
+}
+
+static unsigned int count_strings(const char *strings, unsigned int len)
+{
+	unsigned int num;
+	const char *p;
+
+	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
+		num++;
+
+	return num;
+}
+
+/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */
+static char *join(const char *dir, const char *name)
+{
+	char *buffer;
+
+	if (strlen(name) == 0)
+		buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s", dir);
+	else
+		buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", dir, name);
+	return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
+}
+
+static char **split(char *strings, unsigned int len, unsigned int *num)
+{
+	char *p, **ret;
+
+	/* Count the strings. */
+	*num = count_strings(strings, len);
+
+	/* Transfer to one big alloc for easy freeing. */
+	ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH);
+	if (!ret) {
+		kfree(strings);
+		return ERR_PTR(-ENOMEM);
+	}
+	memcpy(&ret[*num], strings, len);
+	kfree(strings);
+
+	strings = (char *)&ret[*num];
+	for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
+		ret[(*num)++] = p;
+
+	return ret;
+}
+
+char **xenbus_directory(struct xenbus_transaction t,
+			const char *dir, const char *node, unsigned int *num)
+{
+	char *strings, *path;
+	unsigned int len;
+
+	path = join(dir, node);
+	if (IS_ERR(path))
+		return (char **)path;
+
+	strings = xs_single(t, XS_DIRECTORY, path, &len);
+	kfree(path);
+	if (IS_ERR(strings))
+		return (char **)strings;
+
+	return split(strings, len, num);
+}
+EXPORT_SYMBOL_GPL(xenbus_directory);
+
+/* Check if a path exists. Return 1 if it does. */
+int xenbus_exists(struct xenbus_transaction t,
+		  const char *dir, const char *node)
+{
+	char **d;
+	int dir_n;
+
+	d = xenbus_directory(t, dir, node, &dir_n);
+	if (IS_ERR(d))
+		return 0;
+	kfree(d);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(xenbus_exists);
+
+/* Get the value of a single file.
+ * Returns a kmalloced value: call free() on it after use.
+ * len indicates length in bytes.
+ */
+void *xenbus_read(struct xenbus_transaction t,
+		  const char *dir, const char *node, unsigned int *len)
+{
+	char *path;
+	void *ret;
+
+	path = join(dir, node);
+	if (IS_ERR(path))
+		return (void *)path;
+
+	ret = xs_single(t, XS_READ, path, len);
+	kfree(path);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_read);
+
+/* Write the value of a single file.
+ * Returns -err on failure.
+ */
+int xenbus_write(struct xenbus_transaction t,
+		 const char *dir, const char *node, const char *string)
+{
+	const char *path;
+	struct kvec iovec[2];
+	int ret;
+
+	path = join(dir, node);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	iovec[0].iov_base = (void *)path;
+	iovec[0].iov_len = strlen(path) + 1;
+	iovec[1].iov_base = (void *)string;
+	iovec[1].iov_len = strlen(string);
+
+	ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
+	kfree(path);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_write);
+
+/* Create a new directory. */
+int xenbus_mkdir(struct xenbus_transaction t,
+		 const char *dir, const char *node)
+{
+	char *path;
+	int ret;
+
+	path = join(dir, node);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
+	kfree(path);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_mkdir);
+
+/* Destroy a file or directory (directories must be empty). */
+int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
+{
+	char *path;
+	int ret;
+
+	path = join(dir, node);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	ret = xs_error(xs_single(t, XS_RM, path, NULL));
+	kfree(path);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_rm);
+
+/* Start a transaction: changes by others will not be seen during this
+ * transaction, and changes will not be visible to others until end.
+ */
+int xenbus_transaction_start(struct xenbus_transaction *t)
+{
+	char *id_str;
+
+	id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
+	if (IS_ERR(id_str))
+		return PTR_ERR(id_str);
+
+	t->id = simple_strtoul(id_str, NULL, 0);
+	kfree(id_str);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_transaction_start);
+
+/* End a transaction.
+ * If abandon is true, transaction is discarded instead of committed.
+ */
+int xenbus_transaction_end(struct xenbus_transaction t, int abort)
+{
+	char abortstr[2];
+
+	if (abort)
+		strcpy(abortstr, "F");
+	else
+		strcpy(abortstr, "T");
+
+	return xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
+}
+EXPORT_SYMBOL_GPL(xenbus_transaction_end);
+
+/* Single read and scanf: returns -errno or num scanned. */
+int xenbus_scanf(struct xenbus_transaction t,
+		 const char *dir, const char *node, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+	char *val;
+
+	val = xenbus_read(t, dir, node, NULL);
+	if (IS_ERR(val))
+		return PTR_ERR(val);
+
+	va_start(ap, fmt);
+	ret = vsscanf(val, fmt, ap);
+	va_end(ap);
+	kfree(val);
+	/* Distinctive errno. */
+	if (ret == 0)
+		return -ERANGE;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_scanf);
+
+/* Read an (optional) unsigned value. */
+unsigned int xenbus_read_unsigned(const char *dir, const char *node,
+				  unsigned int default_val)
+{
+	unsigned int val;
+	int ret;
+
+	ret = xenbus_scanf(XBT_NIL, dir, node, "%u", &val);
+	if (ret <= 0)
+		val = default_val;
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(xenbus_read_unsigned);
+
+/* Single printf and write: returns -errno or 0. */
+int xenbus_printf(struct xenbus_transaction t,
+		  const char *dir, const char *node, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+	char *buf;
+
+	va_start(ap, fmt);
+	buf = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap);
+	va_end(ap);
+
+	if (!buf)
+		return -ENOMEM;
+
+	ret = xenbus_write(t, dir, node, buf);
+
+	kfree(buf);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_printf);
+
+/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
+int xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
+{
+	va_list ap;
+	const char *name;
+	int ret = 0;
+
+	va_start(ap, dir);
+	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
+		const char *fmt = va_arg(ap, char *);
+		void *result = va_arg(ap, void *);
+		char *p;
+
+		p = xenbus_read(t, dir, name, NULL);
+		if (IS_ERR(p)) {
+			ret = PTR_ERR(p);
+			break;
+		}
+		if (fmt) {
+			if (sscanf(p, fmt, result) == 0)
+				ret = -EINVAL;
+			kfree(p);
+		} else
+			*(char **)result = p;
+	}
+	va_end(ap);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_gather);
+
+static int xs_watch(const char *path, const char *token)
+{
+	struct kvec iov[2];
+
+	iov[0].iov_base = (void *)path;
+	iov[0].iov_len = strlen(path) + 1;
+	iov[1].iov_base = (void *)token;
+	iov[1].iov_len = strlen(token) + 1;
+
+	return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov,
+				 ARRAY_SIZE(iov), NULL));
+}
+
+static int xs_unwatch(const char *path, const char *token)
+{
+	struct kvec iov[2];
+
+	iov[0].iov_base = (char *)path;
+	iov[0].iov_len = strlen(path) + 1;
+	iov[1].iov_base = (char *)token;
+	iov[1].iov_len = strlen(token) + 1;
+
+	return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov,
+				 ARRAY_SIZE(iov), NULL));
+}
+
+static struct xenbus_watch *find_watch(const char *token)
+{
+	struct xenbus_watch *i, *cmp;
+
+	cmp = (void *)simple_strtoul(token, NULL, 16);
+
+	list_for_each_entry(i, &watches, list)
+		if (i == cmp)
+			return i;
+
+	return NULL;
+}
+
+int xs_watch_msg(struct xs_watch_event *event)
+{
+	if (count_strings(event->body, event->len) != 2) {
+		kfree(event);
+		return -EINVAL;
+	}
+	event->path = (const char *)event->body;
+	event->token = (const char *)strchr(event->body, '\0') + 1;
+
+	spin_lock(&watches_lock);
+	event->handle = find_watch(event->token);
+	if (event->handle != NULL &&
+			(!event->handle->will_handle ||
+			 event->handle->will_handle(event->handle,
+				 event->path, event->token))) {
+		spin_lock(&watch_events_lock);
+		list_add_tail(&event->list, &watch_events);
+		event->handle->nr_pending++;
+		wake_up(&watch_events_waitq);
+		spin_unlock(&watch_events_lock);
+	} else
+		kfree(event);
+	spin_unlock(&watches_lock);
+
+	return 0;
+}
+
+/*
+ * Certain older XenBus toolstack cannot handle reading values that are
+ * not populated. Some Xen 3.4 installation are incapable of doing this
+ * so if we are running on anything older than 4 do not attempt to read
+ * control/platform-feature-xs_reset_watches.
+ */
+static bool xen_strict_xenbus_quirk(void)
+{
+#ifdef CONFIG_X86
+	uint32_t eax, ebx, ecx, edx, base;
+
+	base = xen_cpuid_base();
+	cpuid(base + 1, &eax, &ebx, &ecx, &edx);
+
+	if ((eax >> 16) < 4)
+		return true;
+#endif
+	return false;
+
+}
+static void xs_reset_watches(void)
+{
+	int err;
+
+	if (!xen_hvm_domain() || xen_initial_domain())
+		return;
+
+	if (xen_strict_xenbus_quirk())
+		return;
+
+	if (!xenbus_read_unsigned("control",
+				  "platform-feature-xs_reset_watches", 0))
+		return;
+
+	err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL));
+	if (err && err != -EEXIST)
+		pr_warn("xs_reset_watches failed: %d\n", err);
+}
+
+/* Register callback to watch this node. */
+int register_xenbus_watch(struct xenbus_watch *watch)
+{
+	/* Pointer in ascii is the token. */
+	char token[sizeof(watch) * 2 + 1];
+	int err;
+
+	sprintf(token, "%lX", (long)watch);
+
+	watch->nr_pending = 0;
+
+	down_read(&xs_watch_rwsem);
+
+	spin_lock(&watches_lock);
+	BUG_ON(find_watch(token));
+	list_add(&watch->list, &watches);
+	spin_unlock(&watches_lock);
+
+	err = xs_watch(watch->node, token);
+
+	if (err) {
+		spin_lock(&watches_lock);
+		list_del(&watch->list);
+		spin_unlock(&watches_lock);
+	}
+
+	up_read(&xs_watch_rwsem);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(register_xenbus_watch);
+
+void unregister_xenbus_watch(struct xenbus_watch *watch)
+{
+	struct xs_watch_event *event, *tmp;
+	char token[sizeof(watch) * 2 + 1];
+	int err;
+
+	sprintf(token, "%lX", (long)watch);
+
+	down_read(&xs_watch_rwsem);
+
+	spin_lock(&watches_lock);
+	BUG_ON(!find_watch(token));
+	list_del(&watch->list);
+	spin_unlock(&watches_lock);
+
+	err = xs_unwatch(watch->node, token);
+	if (err)
+		pr_warn("Failed to release watch %s: %i\n", watch->node, err);
+
+	up_read(&xs_watch_rwsem);
+
+	/* Make sure there are no callbacks running currently (unless
+	   its us) */
+	if (current->pid != xenwatch_pid)
+		mutex_lock(&xenwatch_mutex);
+
+	/* Cancel pending watch events. */
+	spin_lock(&watch_events_lock);
+	if (watch->nr_pending) {
+		list_for_each_entry_safe(event, tmp, &watch_events, list) {
+			if (event->handle != watch)
+				continue;
+			list_del(&event->list);
+			kfree(event);
+		}
+		watch->nr_pending = 0;
+	}
+	spin_unlock(&watch_events_lock);
+
+	if (current->pid != xenwatch_pid)
+		mutex_unlock(&xenwatch_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
+
+void xs_suspend(void)
+{
+	xs_suspend_enter();
+
+	down_write(&xs_watch_rwsem);
+	mutex_lock(&xs_response_mutex);
+}
+
+void xs_resume(void)
+{
+	struct xenbus_watch *watch;
+	char token[sizeof(watch) * 2 + 1];
+
+	xb_init_comms();
+
+	mutex_unlock(&xs_response_mutex);
+
+	xs_suspend_exit();
+
+	/* No need for watches_lock: the xs_watch_rwsem is sufficient. */
+	list_for_each_entry(watch, &watches, list) {
+		sprintf(token, "%lX", (long)watch);
+		xs_watch(watch->node, token);
+	}
+
+	up_write(&xs_watch_rwsem);
+}
+
+void xs_suspend_cancel(void)
+{
+	mutex_unlock(&xs_response_mutex);
+	up_write(&xs_watch_rwsem);
+
+	xs_suspend_exit();
+}
+
+static int xenwatch_thread(void *unused)
+{
+	struct xs_watch_event *event;
+
+	xenwatch_pid = current->pid;
+
+	for (;;) {
+		wait_event_interruptible(watch_events_waitq,
+					 !list_empty(&watch_events));
+
+		if (kthread_should_stop())
+			break;
+
+		mutex_lock(&xenwatch_mutex);
+
+		spin_lock(&watch_events_lock);
+		event = list_first_entry_or_null(&watch_events,
+				struct xs_watch_event, list);
+		if (event) {
+			list_del(&event->list);
+			event->handle->nr_pending--;
+		}
+		spin_unlock(&watch_events_lock);
+
+		if (event) {
+			event->handle->callback(event->handle, event->path,
+						event->token);
+			kfree(event);
+		}
+
+		mutex_unlock(&xenwatch_mutex);
+	}
+
+	return 0;
+}
+
+/*
+ * Wake up all threads waiting for a xenstore reply. In case of shutdown all
+ * pending replies will be marked as "aborted" in order to let the waiters
+ * return in spite of xenstore possibly no longer being able to reply. This
+ * will avoid blocking shutdown by a thread waiting for xenstore but being
+ * necessary for shutdown processing to proceed.
+ */
+static int xs_reboot_notify(struct notifier_block *nb,
+			    unsigned long code, void *unused)
+{
+	struct xb_req_data *req;
+
+	mutex_lock(&xb_write_mutex);
+	list_for_each_entry(req, &xs_reply_list, list)
+		wake_up(&req->wq);
+	list_for_each_entry(req, &xb_write_list, list)
+		wake_up(&req->wq);
+	mutex_unlock(&xb_write_mutex);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block xs_reboot_nb = {
+	.notifier_call = xs_reboot_notify,
+};
+
+int xs_init(void)
+{
+	int err;
+	struct task_struct *task;
+
+	register_reboot_notifier(&xs_reboot_nb);
+
+	/* Initialize the shared memory rings to talk to xenstored */
+	err = xb_init_comms();
+	if (err)
+		return err;
+
+	task = kthread_run(xenwatch_thread, NULL, "xenwatch");
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+
+	/* shutdown watches for kexec boot */
+	xs_reset_watches();
+
+	return 0;
+}