51 files changed, 4102 insertions, 0 deletions
diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore
new file mode 100644
index 000000000..1cfbb0157
--- /dev/null
+++ b/tools/virtio/.gitignore
@@ -0,0 +1,3 @@
+*.d
+virtio_test
+vringh_test
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
new file mode 100644
index 000000000..8e2a90811
--- /dev/null
+++ b/tools/virtio/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+all: test mod
+test: virtio_test vringh_test
+virtio_test: virtio_ring.o virtio_test.o
+vringh_test: vringh_test.o vringh.o virtio_ring.o
+
+CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE
+vpath %.c ../../drivers/virtio ../../drivers/vhost
+mod:
+	${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V}
+.PHONY: all test mod clean
+clean:
+	${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \
+              vhost_test/Module.symvers vhost_test/modules.order *.d
+-include *.d
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
new file mode 100644
index 000000000..d0351f83a
--- /dev/null
+++ b/tools/virtio/asm/barrier.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if defined(__i386__) || defined(__x86_64__)
+#define barrier() asm volatile("" ::: "memory")
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() barrier()
+#define virt_wmb() barrier()
+/* Atomic store should be enough, but gcc generates worse code in that case. */
+#define virt_store_mb(var, value)  do { \
+	typeof(var) virt_store_mb_value = (value); \
+	__atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
+			  __ATOMIC_SEQ_CST); \
+	barrier(); \
+} while (0);
+/* Weak barriers should be used. If not - it's a bug */
+# define mb() abort()
+# define dma_rmb() abort()
+# define dma_wmb() abort()
+#else
+#error Please fill in barrier macros
+#endif
+
diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h
new file mode 100644
index 000000000..b14c2c3b6
--- /dev/null
+++ b/tools/virtio/linux/bug.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BUG_H
+#define BUG_H
+
+#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
+
+#define BUILD_BUG_ON(x)
+
+#define BUG() abort()
+
+#endif /* BUG_H */
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644
index 000000000..903dc9c4b
--- /dev/null
+++ b/tools/virtio/linux/compiler.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_COMPILER_H
+#define LINUX_COMPILER_H
+
+#define WRITE_ONCE(var, val) \
+	(*((volatile typeof(val) *)(&(var))) = (val))
+
+#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var))))
+
+#endif
diff --git a/tools/virtio/linux/device.h b/tools/virtio/linux/device.h
new file mode 100644
index 000000000..4ad7e1df0
--- /dev/null
+++ b/tools/virtio/linux/device.h
@@ -0,0 +1,2 @@
+#ifndef LINUX_DEVICE_H
+#endif
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
new file mode 100644
index 000000000..f91aeb5fe
--- /dev/null
+++ b/tools/virtio/linux/dma-mapping.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_DMA_MAPPING_H
+#define _LINUX_DMA_MAPPING_H
+
+#ifdef CONFIG_HAS_DMA
+# error Virtio userspace code does not support CONFIG_HAS_DMA
+#endif
+
+enum dma_data_direction {
+	DMA_BIDIRECTIONAL = 0,
+	DMA_TO_DEVICE = 1,
+	DMA_FROM_DEVICE = 2,
+	DMA_NONE = 3,
+};
+
+#define dma_alloc_coherent(d, s, hp, f) ({ \
+	void *__dma_alloc_coherent_p = kmalloc((s), (f)); \
+	*(hp) = (unsigned long)__dma_alloc_coherent_p; \
+	__dma_alloc_coherent_p; \
+})
+
+#define dma_free_coherent(d, s, p, h) kfree(p)
+
+#define dma_map_page(d, p, o, s, dir) (page_to_phys(p) + (o))
+
+#define dma_map_single(d, p, s, dir) (virt_to_phys(p))
+#define dma_mapping_error(...) (0)
+
+#define dma_unmap_single(...) do { } while (0)
+#define dma_unmap_page(...) do { } while (0)
+
+#endif
diff --git a/tools/virtio/linux/err.h b/tools/virtio/linux/err.h
new file mode 100644
index 000000000..0943c644a
--- /dev/null
+++ b/tools/virtio/linux/err.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ERR_H
+#define ERR_H
+#define MAX_ERRNO	4095
+
+#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * __must_check ERR_PTR(long error)
+{
+	return (void *) error;
+}
+
+static inline long __must_check PTR_ERR(const void *ptr)
+{
+	return (long) ptr;
+}
+
+static inline long __must_check IS_ERR(const void *ptr)
+{
+	return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline long __must_check IS_ERR_OR_NULL(const void *ptr)
+{
+	return !ptr || IS_ERR_VALUE((unsigned long)ptr);
+}
+#endif /* ERR_H */
diff --git a/tools/virtio/linux/export.h b/tools/virtio/linux/export.h
new file mode 100644
index 000000000..416875e29
--- /dev/null
+++ b/tools/virtio/linux/export.h
@@ -0,0 +1,3 @@
+#define EXPORT_SYMBOL_GPL(sym) extern typeof(sym) sym
+#define EXPORT_SYMBOL(sym) extern typeof(sym) sym
+
diff --git a/tools/virtio/linux/hrtimer.h b/tools/virtio/linux/hrtimer.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/virtio/linux/hrtimer.h
diff --git a/tools/virtio/linux/irqreturn.h b/tools/virtio/linux/irqreturn.h
new file mode 100644
index 000000000..a3c4e7be7
--- /dev/null
+++ b/tools/virtio/linux/irqreturn.h
@@ -0,0 +1 @@
+#include "../../../include/linux/irqreturn.h"
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
new file mode 100644
index 000000000..7ef45a4a3
--- /dev/null
+++ b/tools/virtio/linux/kernel.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef KERNEL_H
+#define KERNEL_H
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdarg.h>
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/printk.h>
+#include <linux/bug.h>
+#include <errno.h>
+#include <unistd.h>
+#include <asm/barrier.h>
+
+#define CONFIG_SMP
+
+#define PAGE_SIZE getpagesize()
+#define PAGE_MASK (~(PAGE_SIZE-1))
+#define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK)
+
+/* generic data direction definitions */
+#define READ                    0
+#define WRITE                   1
+
+typedef unsigned long long phys_addr_t;
+typedef unsigned long long dma_addr_t;
+typedef size_t __kernel_size_t;
+typedef unsigned int __wsum;
+
+struct page {
+	unsigned long long dummy;
+};
+
+/* Physical == Virtual */
+#define virt_to_phys(p) ((unsigned long)p)
+#define phys_to_virt(a) ((void *)(unsigned long)(a))
+/* Page address: Virtual / 4K */
+#define page_to_phys(p) ((dma_addr_t)(unsigned long)(p))
+#define virt_to_page(p) ((struct page *)((unsigned long)p & PAGE_MASK))
+
+#define offset_in_page(p) (((unsigned long)p) % PAGE_SIZE)
+
+#define __printf(a,b) __attribute__((format(printf,a,b)))
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+
+extern void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+static inline void *kmalloc(size_t s, gfp_t gfp)
+{
+	if (__kmalloc_fake)
+		return __kmalloc_fake;
+	return malloc(s);
+}
+static inline void *kmalloc_array(unsigned n, size_t s, gfp_t gfp)
+{
+	return kmalloc(n * s, gfp);
+}
+
+static inline void *kzalloc(size_t s, gfp_t gfp)
+{
+	void *p = kmalloc(s, gfp);
+
+	memset(p, 0, s);
+	return p;
+}
+
+static inline void *alloc_pages_exact(size_t s, gfp_t gfp)
+{
+	return kmalloc(s, gfp);
+}
+
+static inline void kfree(void *p)
+{
+	if (p >= __kfree_ignore_start && p < __kfree_ignore_end)
+		return;
+	free(p);
+}
+
+static inline void free_pages_exact(void *p, size_t s)
+{
+	kfree(p);
+}
+
+static inline void *krealloc(void *p, size_t s, gfp_t gfp)
+{
+	return realloc(p, s);
+}
+
+
+static inline unsigned long __get_free_page(gfp_t gfp)
+{
+	void *p;
+
+	posix_memalign(&p, PAGE_SIZE, PAGE_SIZE);
+	return (unsigned long)p;
+}
+
+static inline void free_page(unsigned long addr)
+{
+	free((void *)addr);
+}
+
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+#define uninitialized_var(x) x = x
+
+# ifndef likely
+#  define likely(x)	(__builtin_expect(!!(x), 1))
+# endif
+# ifndef unlikely
+#  define unlikely(x)	(__builtin_expect(!!(x), 0))
+# endif
+
+#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#ifdef DEBUG
+#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#else
+#define pr_debug(format, ...) do {} while (0)
+#endif
+#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+
+#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0)
+
+#define min(x, y) ({				\
+	typeof(x) _min1 = (x);			\
+	typeof(y) _min2 = (y);			\
+	(void) (&_min1 == &_min2);		\
+	_min1 < _min2 ? _min1 : _min2; })
+
+/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
+#define list_add_tail(a, b) do {} while (0)
+#define list_del(a) do {} while (0)
+#define list_for_each_entry(a, b, c) while (0)
+/* end of stubs */
+
+#endif /* KERNEL_H */
diff --git a/tools/virtio/linux/kmemleak.h b/tools/virtio/linux/kmemleak.h
new file mode 100644
index 000000000..c07072270
--- /dev/null
+++ b/tools/virtio/linux/kmemleak.h
@@ -0,0 +1,3 @@
+static inline void kmemleak_ignore(const void *ptr)
+{
+}
diff --git a/tools/virtio/linux/module.h b/tools/virtio/linux/module.h
new file mode 100644
index 000000000..9dfa96fea
--- /dev/null
+++ b/tools/virtio/linux/module.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
+
+#define MODULE_LICENSE(__MODULE_LICENSE_value) \
+	static __attribute__((unused)) const char *__MODULE_LICENSE_name = \
+		__MODULE_LICENSE_value
+
diff --git a/tools/virtio/linux/printk.h b/tools/virtio/linux/printk.h
new file mode 100644
index 000000000..9f2423bd8
--- /dev/null
+++ b/tools/virtio/linux/printk.h
@@ -0,0 +1,4 @@
+#include "../../../include/linux/kern_levels.h"
+
+#define printk printf
+#define vprintk vprintf
diff --git a/tools/virtio/linux/ratelimit.h b/tools/virtio/linux/ratelimit.h
new file mode 100644
index 000000000..dcce1725f
--- /dev/null
+++ b/tools/virtio/linux/ratelimit.h
@@ -0,0 +1,4 @@
+#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init)	int name = 0
+
+#define __ratelimit(x) (*(x))
+
diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h
new file mode 100644
index 000000000..369ee308b
--- /dev/null
+++ b/tools/virtio/linux/scatterlist.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef SCATTERLIST_H
+#define SCATTERLIST_H
+#include <linux/kernel.h>
+
+struct scatterlist {
+	unsigned long	page_link;
+	unsigned int	offset;
+	unsigned int	length;
+	dma_addr_t	dma_address;
+};
+
+/* Scatterlist helpers, stolen from linux/scatterlist.h */
+#define sg_is_chain(sg)		((sg)->page_link & 0x01)
+#define sg_is_last(sg)		((sg)->page_link & 0x02)
+#define sg_chain_ptr(sg)	\
+	((struct scatterlist *) ((sg)->page_link & ~0x03))
+
+/**
+ * sg_assign_page - Assign a given page to an SG entry
+ * @sg:		    SG entry
+ * @page:	    The page
+ *
+ * Description:
+ *   Assign page to sg entry. Also see sg_set_page(), the most commonly used
+ *   variant.
+ *
+ **/
+static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
+{
+	unsigned long page_link = sg->page_link & 0x3;
+
+	/*
+	 * In order for the low bit stealing approach to work, pages
+	 * must be aligned at a 32-bit boundary as a minimum.
+	 */
+	BUG_ON((unsigned long) page & 0x03);
+#ifdef CONFIG_DEBUG_SG
+	BUG_ON(sg_is_chain(sg));
+#endif
+	sg->page_link = page_link | (unsigned long) page;
+}
+
+/**
+ * sg_set_page - Set sg entry to point at given page
+ * @sg:		 SG entry
+ * @page:	 The page
+ * @len:	 Length of data
+ * @offset:	 Offset into page
+ *
+ * Description:
+ *   Use this function to set an sg entry pointing at a page, never assign
+ *   the page directly. We encode sg table information in the lower bits
+ *   of the page pointer. See sg_page() for looking up the page belonging
+ *   to an sg entry.
+ *
+ **/
+static inline void sg_set_page(struct scatterlist *sg, struct page *page,
+			       unsigned int len, unsigned int offset)
+{
+	sg_assign_page(sg, page);
+	sg->offset = offset;
+	sg->length = len;
+}
+
+static inline struct page *sg_page(struct scatterlist *sg)
+{
+#ifdef CONFIG_DEBUG_SG
+	BUG_ON(sg_is_chain(sg));
+#endif
+	return (struct page *)((sg)->page_link & ~0x3);
+}
+
+/*
+ * Loop over each sg element, following the pointer to a new list if necessary
+ */
+#define for_each_sg(sglist, sg, nr, __i)	\
+	for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
+
+/**
+ * sg_chain - Chain two sglists together
+ * @prv:	First scatterlist
+ * @prv_nents:	Number of entries in prv
+ * @sgl:	Second scatterlist
+ *
+ * Description:
+ *   Links @prv@ and @sgl@ together, to form a longer scatterlist.
+ *
+ **/
+static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
+			    struct scatterlist *sgl)
+{
+	/*
+	 * offset and length are unused for chain entry.  Clear them.
+	 */
+	prv[prv_nents - 1].offset = 0;
+	prv[prv_nents - 1].length = 0;
+
+	/*
+	 * Set lowest bit to indicate a link pointer, and make sure to clear
+	 * the termination bit if it happens to be set.
+	 */
+	prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02;
+}
+
+/**
+ * sg_mark_end - Mark the end of the scatterlist
+ * @sg:		 SG entryScatterlist
+ *
+ * Description:
+ *   Marks the passed in sg entry as the termination point for the sg
+ *   table. A call to sg_next() on this entry will return NULL.
+ *
+ **/
+static inline void sg_mark_end(struct scatterlist *sg)
+{
+	/*
+	 * Set termination bit, clear potential chain bit
+	 */
+	sg->page_link |= 0x02;
+	sg->page_link &= ~0x01;
+}
+
+/**
+ * sg_unmark_end - Undo setting the end of the scatterlist
+ * @sg:		 SG entryScatterlist
+ *
+ * Description:
+ *   Removes the termination marker from the given entry of the scatterlist.
+ *
+ **/
+static inline void sg_unmark_end(struct scatterlist *sg)
+{
+	sg->page_link &= ~0x02;
+}
+
+static inline struct scatterlist *sg_next(struct scatterlist *sg)
+{
+	if (sg_is_last(sg))
+		return NULL;
+
+	sg++;
+	if (unlikely(sg_is_chain(sg)))
+		sg = sg_chain_ptr(sg);
+
+	return sg;
+}
+
+static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
+{
+	memset(sgl, 0, sizeof(*sgl) * nents);
+	sg_mark_end(&sgl[nents - 1]);
+}
+
+static inline dma_addr_t sg_phys(struct scatterlist *sg)
+{
+	return page_to_phys(sg_page(sg)) + sg->offset;
+}
+
+static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
+			      unsigned int buflen)
+{
+	sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+}
+
+static inline void sg_init_one(struct scatterlist *sg,
+			       const void *buf, unsigned int buflen)
+{
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, buf, buflen);
+}
+#endif /* SCATTERLIST_H */
diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h
new file mode 100644
index 000000000..319dcaa07
--- /dev/null
+++ b/tools/virtio/linux/slab.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_SLAB_H
+#define GFP_KERNEL 0
+#define GFP_ATOMIC 0
+#define __GFP_NOWARN 0
+#define __GFP_ZERO 0
+#endif
diff --git a/tools/virtio/linux/thread_info.h b/tools/virtio/linux/thread_info.h
new file mode 100644
index 000000000..e0f610d08
--- /dev/null
+++ b/tools/virtio/linux/thread_info.h
@@ -0,0 +1 @@
+#define check_copy_size(A, B, C) (1)
diff --git a/tools/virtio/linux/uaccess.h b/tools/virtio/linux/uaccess.h
new file mode 100644
index 000000000..991dfb263
--- /dev/null
+++ b/tools/virtio/linux/uaccess.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef UACCESS_H
+#define UACCESS_H
+
+#include <linux/compiler.h>
+
+extern void *__user_addr_min, *__user_addr_max;
+
+static inline void __chk_user_ptr(const volatile void *p, size_t size)
+{
+	assert(p >= __user_addr_min && p + size <= __user_addr_max);
+}
+
+#define put_user(x, ptr)					\
+({								\
+	typeof(ptr) __pu_ptr = (ptr);				\
+	__chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr));		\
+	WRITE_ONCE(*(__pu_ptr), x);				\
+	0;							\
+})
+
+#define get_user(x, ptr)					\
+({								\
+	typeof(ptr) __pu_ptr = (ptr);				\
+	__chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr));		\
+	x = READ_ONCE(*(__pu_ptr));				\
+	0;							\
+})
+
+static void volatile_memcpy(volatile char *to, const volatile char *from, 
+			    unsigned long n)
+{
+	while (n--)
+		*(to++) = *(from++);
+}
+
+static inline int copy_from_user(void *to, const void __user volatile *from,
+				 unsigned long n)
+{
+	__chk_user_ptr(from, n);
+	volatile_memcpy(to, from, n);
+	return 0;
+}
+
+static inline int copy_to_user(void __user volatile *to, const void *from,
+			       unsigned long n)
+{
+	__chk_user_ptr(to, n);
+	volatile_memcpy(to, from, n);
+	return 0;
+}
+#endif /* UACCESS_H */
diff --git a/tools/virtio/linux/uio.h b/tools/virtio/linux/uio.h
new file mode 100644
index 000000000..cd20f0ba3
--- /dev/null
+++ b/tools/virtio/linux/uio.h
@@ -0,0 +1,3 @@
+#include <linux/kernel.h>
+
+#include "../../../include/linux/uio.h"
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
new file mode 100644
index 000000000..b751350d4
--- /dev/null
+++ b/tools/virtio/linux/virtio.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_VIRTIO_H
+#define LINUX_VIRTIO_H
+#include <linux/scatterlist.h>
+#include <linux/kernel.h>
+
+struct device {
+	void *parent;
+};
+
+struct virtio_device {
+	struct device dev;
+	u64 features;
+};
+
+struct virtqueue {
+	/* TODO: commented as list macros are empty stubs for now.
+	 * Broken but enough for virtio_ring.c
+	 * struct list_head list; */
+	void (*callback)(struct virtqueue *vq);
+	const char *name;
+	struct virtio_device *vdev;
+        unsigned int index;
+        unsigned int num_free;
+	void *priv;
+};
+
+/* Interfaces exported by virtio_ring. */
+int virtqueue_add_sgs(struct virtqueue *vq,
+		      struct scatterlist *sgs[],
+		      unsigned int out_sgs,
+		      unsigned int in_sgs,
+		      void *data,
+		      gfp_t gfp);
+
+int virtqueue_add_outbuf(struct virtqueue *vq,
+			 struct scatterlist sg[], unsigned int num,
+			 void *data,
+			 gfp_t gfp);
+
+int virtqueue_add_inbuf(struct virtqueue *vq,
+			struct scatterlist sg[], unsigned int num,
+			void *data,
+			gfp_t gfp);
+
+bool virtqueue_kick(struct virtqueue *vq);
+
+void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
+
+void virtqueue_disable_cb(struct virtqueue *vq);
+
+bool virtqueue_enable_cb(struct virtqueue *vq);
+bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
+
+void *virtqueue_detach_unused_buf(struct virtqueue *vq);
+struct virtqueue *vring_new_virtqueue(unsigned int index,
+				      unsigned int num,
+				      unsigned int vring_align,
+				      struct virtio_device *vdev,
+				      bool weak_barriers,
+				      bool ctx,
+				      void *pages,
+				      bool (*notify)(struct virtqueue *vq),
+				      void (*callback)(struct virtqueue *vq),
+				      const char *name);
+void vring_del_virtqueue(struct virtqueue *vq);
+
+#endif
diff --git a/tools/virtio/linux/virtio_byteorder.h b/tools/virtio/linux/virtio_byteorder.h
new file mode 100644
index 000000000..5b50f7eeb
--- /dev/null
+++ b/tools/virtio/linux/virtio_byteorder.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_VIRTIO_BYTEORDER_STUB_H
+#define _LINUX_VIRTIO_BYTEORDER_STUB_H
+
+#include <asm/byteorder.h>
+#include "../../include/linux/byteorder/generic.h"
+#include "../../include/linux/virtio_byteorder.h"
+
+#endif
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
new file mode 100644
index 000000000..dbf14c1e2
--- /dev/null
+++ b/tools/virtio/linux/virtio_config.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/virtio_byteorder.h>
+#include <linux/virtio.h>
+#include <uapi/linux/virtio_config.h>
+
+/*
+ * __virtio_test_bit - helper to test feature bits. For use by transports.
+ *                     Devices should normally use virtio_has_feature,
+ *                     which includes more checks.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline bool __virtio_test_bit(const struct virtio_device *vdev,
+				     unsigned int fbit)
+{
+	return vdev->features & (1ULL << fbit);
+}
+
+/**
+ * __virtio_set_bit - helper to set feature bits. For use by transports.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline void __virtio_set_bit(struct virtio_device *vdev,
+				    unsigned int fbit)
+{
+	vdev->features |= (1ULL << fbit);
+}
+
+/**
+ * __virtio_clear_bit - helper to clear feature bits. For use by transports.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline void __virtio_clear_bit(struct virtio_device *vdev,
+				      unsigned int fbit)
+{
+	vdev->features &= ~(1ULL << fbit);
+}
+
+#define virtio_has_feature(dev, feature) \
+	(__virtio_test_bit((dev), feature))
+
+/**
+ * virtio_has_iommu_quirk - determine whether this device has the iommu quirk
+ * @vdev: the device
+ */
+static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
+{
+	/*
+	 * Note the reverse polarity of the quirk feature (compared to most
+	 * other features), this is for compatibility with legacy systems.
+	 */
+	return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+}
+
+static inline bool virtio_is_little_endian(struct virtio_device *vdev)
+{
+	return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
+		virtio_legacy_is_little_endian();
+}
+
+/* Memory accessors */
+static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val)
+{
+	return __virtio16_to_cpu(virtio_is_little_endian(vdev), val);
+}
+
+static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val)
+{
+	return __cpu_to_virtio16(virtio_is_little_endian(vdev), val);
+}
+
+static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val)
+{
+	return __virtio32_to_cpu(virtio_is_little_endian(vdev), val);
+}
+
+static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val)
+{
+	return __cpu_to_virtio32(virtio_is_little_endian(vdev), val);
+}
+
+static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val)
+{
+	return __virtio64_to_cpu(virtio_is_little_endian(vdev), val);
+}
+
+static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
+{
+	return __cpu_to_virtio64(virtio_is_little_endian(vdev), val);
+}
diff --git a/tools/virtio/linux/virtio_ring.h b/tools/virtio/linux/virtio_ring.h
new file mode 100644
index 000000000..8949c4e27
--- /dev/null
+++ b/tools/virtio/linux/virtio_ring.h
@@ -0,0 +1 @@
+#include "../../../include/linux/virtio_ring.h"
diff --git a/tools/virtio/linux/vringh.h b/tools/virtio/linux/vringh.h
new file mode 100644
index 000000000..9348957be
--- /dev/null
+++ b/tools/virtio/linux/vringh.h
@@ -0,0 +1 @@
+#include "../../../include/linux/vringh.h"
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644
index 000000000..85c98c281
--- /dev/null
+++ b/tools/virtio/ringtest/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring noring
+
+CFLAGS += -Wall
+CFLAGS += -pthread -O2 -ggdb -flto -fwhole-program
+LDFLAGS += -pthread -O2 -ggdb -flto -fwhole-program
+
+main.o: main.c main.h
+ring.o: ring.c main.h
+ptr_ring.o: ptr_ring.c main.h ../../../include/linux/ptr_ring.h
+virtio_ring_0_9.o: virtio_ring_0_9.c main.h
+virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
+virtio_ring_inorder.o: virtio_ring_inorder.c virtio_ring_0_9.c main.h
+ring: ring.o main.o
+virtio_ring_0_9: virtio_ring_0_9.o main.o
+virtio_ring_poll: virtio_ring_poll.o main.o
+virtio_ring_inorder: virtio_ring_inorder.o main.o
+ptr_ring: ptr_ring.o main.o
+noring: noring.o main.o
+clean:
+	-rm main.o
+	-rm ring.o ring
+	-rm virtio_ring_0_9.o virtio_ring_0_9
+	-rm virtio_ring_poll.o virtio_ring_poll
+	-rm virtio_ring_inorder.o virtio_ring_inorder
+	-rm ptr_ring.o ptr_ring
+	-rm noring.o noring
+
+.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644
index 000000000..d83707a33
--- /dev/null
+++ b/tools/virtio/ringtest/README
@@ -0,0 +1,6 @@
+Partial implementation of various ring layouts, useful to tune virtio design.
+Uses shared memory heavily.
+
+Typical use:
+
+# sh run-on-all.sh perf stat -r 10 --log-fd 1 -- ./ring
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644
index 000000000..453ca3c21
--- /dev/null
+++ b/tools/virtio/ringtest/main.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Command line processing and common functions for ring benchmarking.
+ */
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sched.h>
+#include "main.h"
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <limits.h>
+
+int runcycles = 10000000;
+int max_outstanding = INT_MAX;
+int batch = 1;
+int param = 0;
+
+bool do_sleep = false;
+bool do_relax = false;
+bool do_exit = true;
+
+unsigned ring_size = 256;
+
+static int kickfd = -1;
+static int callfd = -1;
+
+void notify(int fd)
+{
+	unsigned long long v = 1;
+	int r;
+
+	vmexit();
+	r = write(fd, &v, sizeof v);
+	assert(r == sizeof v);
+	vmentry();
+}
+
+void wait_for_notify(int fd)
+{
+	unsigned long long v = 1;
+	int r;
+
+	vmexit();
+	r = read(fd, &v, sizeof v);
+	assert(r == sizeof v);
+	vmentry();
+}
+
+void kick(void)
+{
+	notify(kickfd);
+}
+
+void wait_for_kick(void)
+{
+	wait_for_notify(kickfd);
+}
+
+void call(void)
+{
+	notify(callfd);
+}
+
+void wait_for_call(void)
+{
+	wait_for_notify(callfd);
+}
+
+void set_affinity(const char *arg)
+{
+	cpu_set_t cpuset;
+	int ret;
+	pthread_t self;
+	long int cpu;
+	char *endptr;
+
+	if (!arg)
+		return;
+
+	cpu = strtol(arg, &endptr, 0);
+	assert(!*endptr);
+
+	assert(cpu >= 0 && cpu < CPU_SETSIZE);
+
+	self = pthread_self();
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+
+	ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
+	assert(!ret);
+}
+
+void poll_used(void)
+{
+	while (used_empty())
+		busy_wait();
+}
+
+static void __attribute__((__flatten__)) run_guest(void)
+{
+	int completed_before;
+	int completed = 0;
+	int started = 0;
+	int bufs = runcycles;
+	int spurious = 0;
+	int r;
+	unsigned len;
+	void *buf;
+	int tokick = batch;
+
+	for (;;) {
+		if (do_sleep)
+			disable_call();
+		completed_before = completed;
+		do {
+			if (started < bufs &&
+			    started - completed < max_outstanding) {
+				r = add_inbuf(0, "Buffer\n", "Hello, world!");
+				if (__builtin_expect(r == 0, true)) {
+					++started;
+					if (!--tokick) {
+						tokick = batch;
+						if (do_sleep)
+							kick_available();
+					}
+
+				}
+			} else
+				r = -1;
+
+			/* Flush out completed bufs if any */
+			if (get_buf(&len, &buf)) {
+				++completed;
+				if (__builtin_expect(completed == bufs, false))
+					return;
+				r = 0;
+			}
+		} while (r == 0);
+		if (completed == completed_before)
+			++spurious;
+		assert(completed <= bufs);
+		assert(started <= bufs);
+		if (do_sleep) {
+			if (used_empty() && enable_call())
+				wait_for_call();
+		} else {
+			poll_used();
+		}
+	}
+}
+
+void poll_avail(void)
+{
+	while (avail_empty())
+		busy_wait();
+}
+
+static void __attribute__((__flatten__)) run_host(void)
+{
+	int completed_before;
+	int completed = 0;
+	int spurious = 0;
+	int bufs = runcycles;
+	unsigned len;
+	void *buf;
+
+	for (;;) {
+		if (do_sleep) {
+			if (avail_empty() && enable_kick())
+				wait_for_kick();
+		} else {
+			poll_avail();
+		}
+		if (do_sleep)
+			disable_kick();
+		completed_before = completed;
+		while (__builtin_expect(use_buf(&len, &buf), true)) {
+			if (do_sleep)
+				call_used();
+			++completed;
+			if (__builtin_expect(completed == bufs, false))
+				return;
+		}
+		if (completed == completed_before)
+			++spurious;
+		assert(completed <= bufs);
+		if (completed == bufs)
+			break;
+	}
+}
+
+void *start_guest(void *arg)
+{
+	set_affinity(arg);
+	run_guest();
+	pthread_exit(NULL);
+}
+
+void *start_host(void *arg)
+{
+	set_affinity(arg);
+	run_host();
+	pthread_exit(NULL);
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+	{
+		.name = "help",
+		.has_arg = no_argument,
+		.val = 'h',
+	},
+	{
+		.name = "host-affinity",
+		.has_arg = required_argument,
+		.val = 'H',
+	},
+	{
+		.name = "guest-affinity",
+		.has_arg = required_argument,
+		.val = 'G',
+	},
+	{
+		.name = "ring-size",
+		.has_arg = required_argument,
+		.val = 'R',
+	},
+	{
+		.name = "run-cycles",
+		.has_arg = required_argument,
+		.val = 'C',
+	},
+	{
+		.name = "outstanding",
+		.has_arg = required_argument,
+		.val = 'o',
+	},
+	{
+		.name = "batch",
+		.has_arg = required_argument,
+		.val = 'b',
+	},
+	{
+		.name = "param",
+		.has_arg = required_argument,
+		.val = 'p',
+	},
+	{
+		.name = "sleep",
+		.has_arg = no_argument,
+		.val = 's',
+	},
+	{
+		.name = "relax",
+		.has_arg = no_argument,
+		.val = 'x',
+	},
+	{
+		.name = "exit",
+		.has_arg = no_argument,
+		.val = 'e',
+	},
+	{
+	}
+};
+
+static void help(void)
+{
+	fprintf(stderr, "Usage: <test> [--help]"
+		" [--host-affinity H]"
+		" [--guest-affinity G]"
+		" [--ring-size R (default: %d)]"
+		" [--run-cycles C (default: %d)]"
+		" [--batch b]"
+		" [--outstanding o]"
+		" [--param p]"
+		" [--sleep]"
+		" [--relax]"
+		" [--exit]"
+		"\n",
+		ring_size,
+		runcycles);
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+	pthread_t host, guest;
+	void *tret;
+	char *host_arg = NULL;
+	char *guest_arg = NULL;
+	char *endptr;
+	long int c;
+
+	kickfd = eventfd(0, 0);
+	assert(kickfd >= 0);
+	callfd = eventfd(0, 0);
+	assert(callfd >= 0);
+
+	for (;;) {
+		int o = getopt_long(argc, argv, optstring, longopts, NULL);
+		switch (o) {
+		case -1:
+			goto done;
+		case '?':
+			help();
+			exit(2);
+		case 'H':
+			host_arg = optarg;
+			break;
+		case 'G':
+			guest_arg = optarg;
+			break;
+		case 'R':
+			ring_size = strtol(optarg, &endptr, 0);
+			assert(ring_size && !(ring_size & (ring_size - 1)));
+			assert(!*endptr);
+			break;
+		case 'C':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			runcycles = c;
+			break;
+		case 'o':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			max_outstanding = c;
+			break;
+		case 'p':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			param = c;
+			break;
+		case 'b':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			batch = c;
+			break;
+		case 's':
+			do_sleep = true;
+			break;
+		case 'x':
+			do_relax = true;
+			break;
+		case 'e':
+			do_exit = true;
+			break;
+		default:
+			help();
+			exit(4);
+			break;
+		}
+	}
+
+	/* does nothing here, used to make sure all smp APIs compile */
+	smp_acquire();
+	smp_release();
+	smp_mb();
+done:
+
+	if (batch > max_outstanding)
+		batch = max_outstanding;
+
+	if (optind < argc) {
+		help();
+		exit(4);
+	}
+	alloc_ring();
+
+	ret = pthread_create(&host, NULL, start_host, host_arg);
+	assert(!ret);
+	ret = pthread_create(&guest, NULL, start_guest, guest_arg);
+	assert(!ret);
+
+	ret = pthread_join(guest, &tret);
+	assert(!ret);
+	ret = pthread_join(host, &tret);
+	assert(!ret);
+	return 0;
+}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644
index 000000000..301d59bfc
--- /dev/null
+++ b/tools/virtio/ringtest/main.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Common macros and functions for ring benchmarking.
+ */
+#ifndef MAIN_H
+#define MAIN_H
+
+#include <stdbool.h>
+
+extern int param;
+
+extern bool do_exit;
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "x86intrin.h"
+
+static inline void wait_cycles(unsigned long long cycles)
+{
+	unsigned long long t;
+
+	t = __rdtsc();
+	while (__rdtsc() - t < cycles) {}
+}
+
+#define VMEXIT_CYCLES 500
+#define VMENTRY_CYCLES 500
+
+#elif defined(__s390x__)
+static inline void wait_cycles(unsigned long long cycles)
+{
+	asm volatile("0: brctg %0,0b" : : "d" (cycles));
+}
+
+/* tweak me */
+#define VMEXIT_CYCLES 200
+#define VMENTRY_CYCLES 200
+
+#else
+static inline void wait_cycles(unsigned long long cycles)
+{
+	_Exit(5);
+}
+#define VMEXIT_CYCLES 0
+#define VMENTRY_CYCLES 0
+#endif
+
+static inline void vmexit(void)
+{
+	if (!do_exit)
+		return;
+	
+	wait_cycles(VMEXIT_CYCLES);
+}
+static inline void vmentry(void)
+{
+	if (!do_exit)
+		return;
+	
+	wait_cycles(VMENTRY_CYCLES);
+}
+
+/* implemented by ring */
+void alloc_ring(void);
+/* guest side */
+int add_inbuf(unsigned, void *, void *);
+void *get_buf(unsigned *, void **);
+void disable_call();
+bool used_empty();
+bool enable_call();
+void kick_available();
+/* host side */
+void disable_kick();
+bool avail_empty();
+bool enable_kick();
+bool use_buf(unsigned *, void **);
+void call_used();
+
+/* implemented by main */
+extern bool do_sleep;
+void kick(void);
+void wait_for_kick(void);
+void call(void);
+void wait_for_call(void);
+
+extern unsigned ring_size;
+
+/* Compiler barrier - similar to what Linux uses */
+#define barrier() asm volatile("" ::: "memory")
+
+/* Is there a portable way to do this? */
+#if defined(__x86_64__) || defined(__i386__)
+#define cpu_relax() asm ("rep; nop" ::: "memory")
+#elif defined(__s390x__)
+#define cpu_relax() barrier()
+#else
+#define cpu_relax() assert(0)
+#endif
+
+extern bool do_relax;
+
+static inline void busy_wait(void)
+{
+	if (do_relax)
+		cpu_relax();
+	else
+		/* prevent compiler from removing busy loops */
+		barrier();
+} 
+
+#if defined(__x86_64__) || defined(__i386__)
+#define smp_mb()     asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
+#else
+/*
+ * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
+ * with other __ATOMIC_SEQ_CST calls.
+ */
+#define smp_mb() __sync_synchronize()
+#endif
+
+/*
+ * This abuses the atomic builtins for thread fences, and
+ * adds a compiler barrier.
+ */
+#define smp_release() do { \
+    barrier(); \
+    __atomic_thread_fence(__ATOMIC_RELEASE); \
+} while (0)
+
+#define smp_acquire() do { \
+    __atomic_thread_fence(__ATOMIC_ACQUIRE); \
+    barrier(); \
+} while (0)
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
+#define smp_wmb() barrier()
+#else
+#define smp_wmb() smp_release()
+#endif
+
+#ifdef __alpha__
+#define smp_read_barrier_depends() smp_acquire()
+#else
+#define smp_read_barrier_depends() do {} while(0)
+#endif
+
+static __always_inline
+void __read_once_size(const volatile void *p, void *res, int size)
+{
+        switch (size) {                                                 \
+        case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break;              \
+        case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break;            \
+        case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break;            \
+        case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break;            \
+        default:                                                        \
+                barrier();                                              \
+                __builtin_memcpy((void *)res, (const void *)p, size);   \
+                barrier();                                              \
+        }                                                               \
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+	switch (size) {
+	case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
+	case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
+	case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
+	case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
+	default:
+		barrier();
+		__builtin_memcpy((void *)p, (const void *)res, size);
+		barrier();
+	}
+}
+
+#define READ_ONCE(x) \
+({									\
+	union { typeof(x) __val; char __c[1]; } __u;			\
+	__read_once_size(&(x), __u.__c, sizeof(x));		\
+	smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
+	__u.__val;							\
+})
+
+#define WRITE_ONCE(x, val) \
+({							\
+	union { typeof(x) __val; char __c[1]; } __u =	\
+		{ .__val = (typeof(x)) (val) }; \
+	__write_once_size(&(x), __u.__c, sizeof(x));	\
+	__u.__val;					\
+})
+
+#endif
diff --git a/tools/virtio/ringtest/noring.c b/tools/virtio/ringtest/noring.c
new file mode 100644
index 000000000..ce2440d5c
--- /dev/null
+++ b/tools/virtio/ringtest/noring.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include "main.h"
+#include <assert.h>
+
+/* stub implementation: useful for measuring overhead */
+void alloc_ring(void)
+{
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+	return 0;
+}
+
+/*
+ * skb_array API provides no way for producer to find out whether a given
+ * buffer was consumed.  Our tests merely require that a successful get_buf
+ * implies that add_inbuf succeed in the past, and that add_inbuf will succeed,
+ * fake it accordingly.
+ */
+void *get_buf(unsigned *lenp, void **bufp)
+{
+	return "Buffer";
+}
+
+bool used_empty()
+{
+	return false;
+}
+
+void disable_call()
+{
+	assert(0);
+}
+
+bool enable_call()
+{
+	assert(0);
+}
+
+void kick_available(void)
+{
+	assert(0);
+}
+
+/* host side */
+void disable_kick()
+{
+	assert(0);
+}
+
+bool enable_kick()
+{
+	assert(0);
+}
+
+bool avail_empty()
+{
+	return false;
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+	return true;
+}
+
+void call_used(void)
+{
+	assert(0);
+}
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
new file mode 100644
index 000000000..2d566fbd2
--- /dev/null
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+#include <malloc.h>
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+
+#define SMP_CACHE_BYTES 64
+#define cache_line_size() SMP_CACHE_BYTES
+#define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES)))
+#define unlikely(x)    (__builtin_expect(!!(x), 0))
+#define likely(x)    (__builtin_expect(!!(x), 1))
+#define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
+#define SIZE_MAX        (~(size_t)0)
+#define KMALLOC_MAX_SIZE SIZE_MAX
+#define BUG_ON(x) assert(x)
+
+typedef pthread_spinlock_t  spinlock_t;
+
+typedef int gfp_t;
+#define __GFP_ZERO 0x1
+
+static void *kmalloc(unsigned size, gfp_t gfp)
+{
+	void *p = memalign(64, size);
+	if (!p)
+		return p;
+
+	if (gfp & __GFP_ZERO)
+		memset(p, 0, size);
+	return p;
+}
+
+static inline void *kzalloc(unsigned size, gfp_t flags)
+{
+	return kmalloc(size, flags | __GFP_ZERO);
+}
+
+static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
+{
+	if (size != 0 && n > SIZE_MAX / size)
+		return NULL;
+	return kmalloc(n * size, flags);
+}
+
+static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
+{
+	return kmalloc_array(n, size, flags | __GFP_ZERO);
+}
+
+static void kfree(void *p)
+{
+	if (p)
+		free(p);
+}
+
+#define kvmalloc_array kmalloc_array
+#define kvfree kfree
+
+static void spin_lock_init(spinlock_t *lock)
+{
+	int r = pthread_spin_init(lock, 0);
+	assert(!r);
+}
+
+static void spin_lock(spinlock_t *lock)
+{
+	int ret = pthread_spin_lock(lock);
+	assert(!ret);
+}
+
+static void spin_unlock(spinlock_t *lock)
+{
+	int ret = pthread_spin_unlock(lock);
+	assert(!ret);
+}
+
+static void spin_lock_bh(spinlock_t *lock)
+{
+	spin_lock(lock);
+}
+
+static void spin_unlock_bh(spinlock_t *lock)
+{
+	spin_unlock(lock);
+}
+
+static void spin_lock_irq(spinlock_t *lock)
+{
+	spin_lock(lock);
+}
+
+static void spin_unlock_irq(spinlock_t *lock)
+{
+	spin_unlock(lock);
+}
+
+static void spin_lock_irqsave(spinlock_t *lock, unsigned long f)
+{
+	spin_lock(lock);
+}
+
+static void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f)
+{
+	spin_unlock(lock);
+}
+
+#include "../../../include/linux/ptr_ring.h"
+
+static unsigned long long headcnt, tailcnt;
+static struct ptr_ring array ____cacheline_aligned_in_smp;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+	int ret = ptr_ring_init(&array, ring_size, 0);
+	assert(!ret);
+	/* Hacky way to poke at ring internals. Useful for testing though. */
+	if (param)
+		array.batch = param;
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+	int ret;
+
+	ret = __ptr_ring_produce(&array, buf);
+	if (ret >= 0) {
+		ret = 0;
+		headcnt++;
+	}
+
+	return ret;
+}
+
+/*
+ * ptr_ring API provides no way for producer to find out whether a given
+ * buffer was consumed.  Our tests merely require that a successful get_buf
+ * implies that add_inbuf succeed in the past, and that add_inbuf will succeed,
+ * fake it accordingly.
+ */
+void *get_buf(unsigned *lenp, void **bufp)
+{
+	void *datap;
+
+	if (tailcnt == headcnt || __ptr_ring_full(&array))
+		datap = NULL;
+	else {
+		datap = "Buffer\n";
+		++tailcnt;
+	}
+
+	return datap;
+}
+
+bool used_empty()
+{
+	return (tailcnt == headcnt || __ptr_ring_full(&array));
+}
+
+void disable_call()
+{
+	assert(0);
+}
+
+bool enable_call()
+{
+	assert(0);
+}
+
+void kick_available(void)
+{
+	assert(0);
+}
+
+/* host side */
+void disable_kick()
+{
+	assert(0);
+}
+
+bool enable_kick()
+{
+	assert(0);
+}
+
+bool avail_empty()
+{
+	return __ptr_ring_empty(&array);
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+	void *ptr;
+
+	ptr = __ptr_ring_consume(&array);
+
+	return ptr;
+}
+
+void call_used(void)
+{
+	assert(0);
+}
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644
index 000000000..5a41404aa
--- /dev/null
+++ b/tools/virtio/ringtest/ring.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
+ * signalling, unconditionally.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Next - Where next entry will be written.
+ * Prev - "Next" value when event triggered previously.
+ * Event - Peer requested event after writing this entry.
+ */
+static inline bool need_event(unsigned short event,
+			      unsigned short next,
+			      unsigned short prev)
+{
+	return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
+}
+
+/* Design:
+ * Guest adds descriptors with unique index values and DESC_HW in flags.
+ * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
+ * Flags are always set last.
+ */
+#define DESC_HW 0x1
+
+struct desc {
+	unsigned short flags;
+	unsigned short index;
+	unsigned len;
+	unsigned long long addr;
+};
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+/* Mostly read */
+struct event {
+	unsigned short kick_index;
+	unsigned char reserved0[HOST_GUEST_PADDING - 2];
+	unsigned short call_index;
+	unsigned char reserved1[HOST_GUEST_PADDING - 2];
+};
+
+struct data {
+	void *buf; /* descriptor is writeable, we can't get buf from there */
+	void *data;
+} *data;
+
+struct desc *ring;
+struct event *event;
+
+struct guest {
+	unsigned avail_idx;
+	unsigned last_used_idx;
+	unsigned num_free;
+	unsigned kicked_avail_idx;
+	unsigned char reserved[HOST_GUEST_PADDING - 12];
+} guest;
+
+struct host {
+	/* we do not need to track last avail index
+	 * unless we have more than one in flight.
+	 */
+	unsigned used_idx;
+	unsigned called_used_idx;
+	unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+	int ret;
+	int i;
+
+	ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
+	if (ret) {
+		perror("Unable to allocate ring buffer.\n");
+		exit(3);
+	}
+	event = calloc(1, sizeof(*event));
+	if (!event) {
+		perror("Unable to allocate event buffer.\n");
+		exit(3);
+	}
+	guest.avail_idx = 0;
+	guest.kicked_avail_idx = -1;
+	guest.last_used_idx = 0;
+	host.used_idx = 0;
+	host.called_used_idx = -1;
+	for (i = 0; i < ring_size; ++i) {
+		struct desc desc = {
+			.index = i,
+		};
+		ring[i] = desc;
+	}
+	guest.num_free = ring_size;
+	data = calloc(ring_size, sizeof(*data));
+	if (!data) {
+		perror("Unable to allocate data buffer.\n");
+		exit(3);
+	}
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+	unsigned head, index;
+
+	if (!guest.num_free)
+		return -1;
+
+	guest.num_free--;
+	head = (ring_size - 1) & (guest.avail_idx++);
+
+	/* Start with a write. On MESI architectures this helps
+	 * avoid a shared state with consumer that is polling this descriptor.
+	 */
+	ring[head].addr = (unsigned long)(void*)buf;
+	ring[head].len = len;
+	/* read below might bypass write above. That is OK because it's just an
+	 * optimization. If this happens, we will get the cache line in a
+	 * shared state which is unfortunate, but probably not worth it to
+	 * add an explicit full barrier to avoid this.
+	 */
+	barrier();
+	index = ring[head].index;
+	data[index].buf = buf;
+	data[index].data = datap;
+	/* Barrier A (for pairing) */
+	smp_release();
+	ring[head].flags = DESC_HW;
+
+	return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+	unsigned index;
+	void *datap;
+
+	if (ring[head].flags & DESC_HW)
+		return NULL;
+	/* Barrier B (for pairing) */
+	smp_acquire();
+	*lenp = ring[head].len;
+	index = ring[head].index & (ring_size - 1);
+	datap = data[index].data;
+	*bufp = data[index].buf;
+	data[index].buf = NULL;
+	data[index].data = NULL;
+	guest.num_free++;
+	guest.last_used_idx++;
+	return datap;
+}
+
+bool used_empty()
+{
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+	return (ring[head].flags & DESC_HW);
+}
+
+void disable_call()
+{
+	/* Doing nothing to disable calls might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_call()
+{
+	event->call_index = guest.last_used_idx;
+	/* Flush call index write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+	return used_empty();
+}
+
+void kick_available(void)
+{
+	bool need;
+
+	/* Flush in previous flags write */
+	/* Barrier C (for pairing) */
+	smp_mb();
+	need = need_event(event->kick_index,
+			   guest.avail_idx,
+			   guest.kicked_avail_idx);
+
+	guest.kicked_avail_idx = guest.avail_idx;
+	if (need)
+		kick();
+}
+
+/* host side */
+void disable_kick()
+{
+	/* Doing nothing to disable kicks might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_kick()
+{
+	event->kick_index = host.used_idx;
+	/* Barrier C (for pairing) */
+	smp_mb();
+	return avail_empty();
+}
+
+bool avail_empty()
+{
+	unsigned head = (ring_size - 1) & host.used_idx;
+
+	return !(ring[head].flags & DESC_HW);
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+	unsigned head = (ring_size - 1) & host.used_idx;
+
+	if (!(ring[head].flags & DESC_HW))
+		return false;
+
+	/* make sure length read below is not speculated */
+	/* Barrier A (for pairing) */
+	smp_acquire();
+
+	/* simple in-order completion: we don't need
+	 * to touch index at all. This also means we
+	 * can just modify the descriptor in-place.
+	 */
+	ring[head].len--;
+	/* Make sure len is valid before flags.
+	 * Note: alternative is to write len and flags in one access -
+	 * possible on 64 bit architectures but wmb is free on Intel anyway
+	 * so I have no way to test whether it's a gain.
+	 */
+	/* Barrier B (for pairing) */
+	smp_release();
+	ring[head].flags = 0;
+	host.used_idx++;
+	return true;
+}
+
+void call_used(void)
+{
+	bool need;
+
+	/* Flush in previous flags write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+
+	need = need_event(event->call_index,
+			host.used_idx,
+			host.called_used_idx);
+
+	host.called_used_idx = host.used_idx;
+
+	if (need)
+		call();
+}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755
index 000000000..dcc3ea758
--- /dev/null
+++ b/tools/virtio/ringtest/run-on-all.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+CPUS_ONLINE=$(lscpu --online -p=cpu|grep -v -e '#')
+#use last CPU for host. Why not the first?
+#many devices tend to use cpu0 by default so
+#it tends to be busier
+HOST_AFFINITY=$(echo "${CPUS_ONLINE}"|tail -n 1)
+
+#run command on all cpus
+for cpu in $CPUS_ONLINE
+do
+	#Don't run guest and host on same CPU
+	#It actually works ok if using signalling
+	if
+		(echo "$@" | grep -e "--sleep" > /dev/null) || \
+			test $HOST_AFFINITY '!=' $cpu
+	then
+		echo "GUEST AFFINITY $cpu"
+		"$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
+	fi
+done
+echo "NO GUEST AFFINITY"
+"$@" --host-affinity $HOST_AFFINITY
+echo "NO AFFINITY"
+"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644
index 000000000..5fd3fbcb9
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_0_9.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Partial implementation of virtio 0.9. event index is used for signalling,
+ * unconditionally. Design roughly follows linux kernel implementation in order
+ * to be able to judge its performance.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <linux/virtio_ring.h>
+
+struct data {
+	void *data;
+} *data;
+
+struct vring ring;
+
+/* enabling the below activates experimental ring polling code
+ * (which skips index reads on consumer in favor of looking at
+ * high bits of ring id ^ 0x8000).
+ */
+/* #ifdef RING_POLL */
+/* enabling the below activates experimental in-order code
+ * (which skips ring updates and reads and writes len in descriptor).
+ */
+/* #ifdef INORDER */
+
+#if defined(RING_POLL) && defined(INORDER)
+#error "RING_POLL and INORDER are mutually exclusive"
+#endif
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+struct guest {
+	unsigned short avail_idx;
+	unsigned short last_used_idx;
+	unsigned short num_free;
+	unsigned short kicked_avail_idx;
+#ifndef INORDER
+	unsigned short free_head;
+#else
+	unsigned short reserved_free_head;
+#endif
+	unsigned char reserved[HOST_GUEST_PADDING - 10];
+} guest;
+
+struct host {
+	/* we do not need to track last avail index
+	 * unless we have more than one in flight.
+	 */
+	unsigned short used_idx;
+	unsigned short called_used_idx;
+	unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+	int ret;
+	int i;
+	void *p;
+
+	ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
+	if (ret) {
+		perror("Unable to allocate ring buffer.\n");
+		exit(3);
+	}
+	memset(p, 0, vring_size(ring_size, 0x1000));
+	vring_init(&ring, ring_size, p, 0x1000);
+
+	guest.avail_idx = 0;
+	guest.kicked_avail_idx = -1;
+	guest.last_used_idx = 0;
+#ifndef INORDER
+	/* Put everything in free lists. */
+	guest.free_head = 0;
+#endif
+	for (i = 0; i < ring_size - 1; i++)
+		ring.desc[i].next = i + 1;
+	host.used_idx = 0;
+	host.called_used_idx = -1;
+	guest.num_free = ring_size;
+	data = malloc(ring_size * sizeof *data);
+	if (!data) {
+		perror("Unable to allocate data buffer.\n");
+		exit(3);
+	}
+	memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+	unsigned head;
+#ifndef INORDER
+	unsigned avail;
+#endif
+	struct vring_desc *desc;
+
+	if (!guest.num_free)
+		return -1;
+
+#ifdef INORDER
+	head = (ring_size - 1) & (guest.avail_idx++);
+#else
+	head = guest.free_head;
+#endif
+	guest.num_free--;
+
+	desc = ring.desc;
+	desc[head].flags = VRING_DESC_F_NEXT;
+	desc[head].addr = (unsigned long)(void *)buf;
+	desc[head].len = len;
+	/* We do it like this to simulate the way
+	 * we'd have to flip it if we had multiple
+	 * descriptors.
+	 */
+	desc[head].flags &= ~VRING_DESC_F_NEXT;
+#ifndef INORDER
+	guest.free_head = desc[head].next;
+#endif
+
+	data[head].data = datap;
+
+#ifdef RING_POLL
+	/* Barrier A (for pairing) */
+	smp_release();
+	avail = guest.avail_idx++;
+	ring.avail->ring[avail & (ring_size - 1)] =
+		(head | (avail & ~(ring_size - 1))) ^ 0x8000;
+#else
+#ifndef INORDER
+	/* Barrier A (for pairing) */
+	smp_release();
+	avail = (ring_size - 1) & (guest.avail_idx++);
+	ring.avail->ring[avail] = head;
+#endif
+	/* Barrier A (for pairing) */
+	smp_release();
+#endif
+	ring.avail->idx = guest.avail_idx;
+	return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+	unsigned head;
+	unsigned index;
+	void *datap;
+
+#ifdef RING_POLL
+	head = (ring_size - 1) & guest.last_used_idx;
+	index = ring.used->ring[head].id;
+	if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+		return NULL;
+	/* Barrier B (for pairing) */
+	smp_acquire();
+	index &= ring_size - 1;
+#else
+	if (ring.used->idx == guest.last_used_idx)
+		return NULL;
+	/* Barrier B (for pairing) */
+	smp_acquire();
+#ifdef INORDER
+	head = (ring_size - 1) & guest.last_used_idx;
+	index = head;
+#else
+	head = (ring_size - 1) & guest.last_used_idx;
+	index = ring.used->ring[head].id;
+#endif
+
+#endif
+#ifdef INORDER
+	*lenp = ring.desc[index].len;
+#else
+	*lenp = ring.used->ring[head].len;
+#endif
+	datap = data[index].data;
+	*bufp = (void*)(unsigned long)ring.desc[index].addr;
+	data[index].data = NULL;
+#ifndef INORDER
+	ring.desc[index].next = guest.free_head;
+	guest.free_head = index;
+#endif
+	guest.num_free++;
+	guest.last_used_idx++;
+	return datap;
+}
+
+bool used_empty()
+{
+	unsigned short last_used_idx = guest.last_used_idx;
+#ifdef RING_POLL
+	unsigned short head = last_used_idx & (ring_size - 1);
+	unsigned index = ring.used->ring[head].id;
+
+	return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
+#else
+	return ring.used->idx == last_used_idx;
+#endif
+}
+
+void disable_call()
+{
+	/* Doing nothing to disable calls might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_call()
+{
+	vring_used_event(&ring) = guest.last_used_idx;
+	/* Flush call index write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+	return used_empty();
+}
+
+void kick_available(void)
+{
+	bool need;
+
+	/* Flush in previous flags write */
+	/* Barrier C (for pairing) */
+	smp_mb();
+	need = vring_need_event(vring_avail_event(&ring),
+				guest.avail_idx,
+				guest.kicked_avail_idx);
+
+	guest.kicked_avail_idx = guest.avail_idx;
+	if (need)
+		kick();
+}
+
+/* host side */
+void disable_kick()
+{
+	/* Doing nothing to disable kicks might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_kick()
+{
+	vring_avail_event(&ring) = host.used_idx;
+	/* Barrier C (for pairing) */
+	smp_mb();
+	return avail_empty();
+}
+
+bool avail_empty()
+{
+	unsigned head = host.used_idx;
+#ifdef RING_POLL
+	unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+	return ((index ^ head ^ 0x8000) & ~(ring_size - 1));
+#else
+	return head == ring.avail->idx;
+#endif
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+	unsigned used_idx = host.used_idx;
+	struct vring_desc *desc;
+	unsigned head;
+
+#ifdef RING_POLL
+	head = ring.avail->ring[used_idx & (ring_size - 1)];
+	if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
+		return false;
+	/* Barrier A (for pairing) */
+	smp_acquire();
+
+	used_idx &= ring_size - 1;
+	desc = &ring.desc[head & (ring_size - 1)];
+#else
+	if (used_idx == ring.avail->idx)
+		return false;
+
+	/* Barrier A (for pairing) */
+	smp_acquire();
+
+	used_idx &= ring_size - 1;
+#ifdef INORDER
+	head = used_idx;
+#else
+	head = ring.avail->ring[used_idx];
+#endif
+	desc = &ring.desc[head];
+#endif
+
+	*lenp = desc->len;
+	*bufp = (void *)(unsigned long)desc->addr;
+
+#ifdef INORDER
+	desc->len = desc->len - 1;
+#else
+	/* now update used ring */
+	ring.used->ring[used_idx].id = head;
+	ring.used->ring[used_idx].len = desc->len - 1;
+#endif
+	/* Barrier B (for pairing) */
+	smp_release();
+	host.used_idx++;
+	ring.used->idx = host.used_idx;
+	
+	return true;
+}
+
+void call_used(void)
+{
+	bool need;
+
+	/* Flush in previous flags write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+	need = vring_need_event(vring_used_event(&ring),
+				host.used_idx,
+				host.called_used_idx);
+
+	host.called_used_idx = host.used_idx;
+	if (need)
+		call();
+}
diff --git a/tools/virtio/ringtest/virtio_ring_inorder.c b/tools/virtio/ringtest/virtio_ring_inorder.c
new file mode 100644
index 000000000..2438ca58a
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_inorder.c
@@ -0,0 +1,2 @@
+#define INORDER 1
+#include "virtio_ring_0_9.c"
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644
index 000000000..84fc2c557
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_poll.c
@@ -0,0 +1,2 @@
+#define RING_POLL 1
+#include "virtio_ring_0_9.c"
diff --git a/tools/virtio/uapi/linux/uio.h b/tools/virtio/uapi/linux/uio.h
new file mode 100644
index 000000000..7230e9002
--- /dev/null
+++ b/tools/virtio/uapi/linux/uio.h
@@ -0,0 +1 @@
+#include <sys/uio.h>
diff --git a/tools/virtio/uapi/linux/virtio_config.h b/tools/virtio/uapi/linux/virtio_config.h
new file mode 100644
index 000000000..4c86675f0
--- /dev/null
+++ b/tools/virtio/uapi/linux/virtio_config.h
@@ -0,0 +1 @@
+#include "../../../../include/uapi/linux/virtio_config.h"
diff --git a/tools/virtio/uapi/linux/virtio_ring.h b/tools/virtio/uapi/linux/virtio_ring.h
new file mode 100644
index 000000000..cf50b2e5f
--- /dev/null
+++ b/tools/virtio/uapi/linux/virtio_ring.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef VIRTIO_RING_H
+#define VIRTIO_RING_H
+#include "../../../../include/uapi/linux/virtio_ring.h"
+#endif /* VIRTIO_RING_H */
diff --git a/tools/virtio/uapi/linux/virtio_types.h b/tools/virtio/uapi/linux/virtio_types.h
new file mode 100644
index 000000000..e7a1096e7
--- /dev/null
+++ b/tools/virtio/uapi/linux/virtio_types.h
@@ -0,0 +1 @@
+#include "../../include/uapi/linux/virtio_types.h"
diff --git a/tools/virtio/vhost_test/Makefile b/tools/virtio/vhost_test/Makefile
new file mode 100644
index 000000000..a1d35b81b
--- /dev/null
+++ b/tools/virtio/vhost_test/Makefile
@@ -0,0 +1,2 @@
+obj-m += vhost_test.o
+EXTRA_CFLAGS += -Idrivers/vhost
diff --git a/tools/virtio/vhost_test/vhost_test.c b/tools/virtio/vhost_test/vhost_test.c
new file mode 100644
index 000000000..18735189e
--- /dev/null
+++ b/tools/virtio/vhost_test/vhost_test.c
@@ -0,0 +1 @@
+#include "test.c"
diff --git a/tools/virtio/virtio-trace/Makefile b/tools/virtio/virtio-trace/Makefile
new file mode 100644
index 000000000..7843ebcda
--- /dev/null
+++ b/tools/virtio/virtio-trace/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+CC = gcc
+CFLAGS = -O2 -Wall -pthread
+
+all: trace-agent
+
+.c.o:
+	$(CC) $(CFLAGS) -c $^ -o $@
+
+trace-agent: trace-agent.o trace-agent-ctl.o trace-agent-rw.o
+	$(CC) $(CFLAGS) -o $@ $^
+
+clean:
+	rm -f *.o trace-agent
diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README
new file mode 100644
index 000000000..b64845b82
--- /dev/null
+++ b/tools/virtio/virtio-trace/README
@@ -0,0 +1,118 @@
+Trace Agent for virtio-trace
+============================
+
+Trace agent is a user tool for sending trace data of a guest to a Host in low
+overhead. Trace agent has the following functions:
+ - splice a page of ring-buffer to read_pipe without memory copying
+ - splice the page from write_pipe to virtio-console without memory copying
+ - write trace data to stdout by using -o option
+ - controlled by start/stop orders from a Host
+
+The trace agent operates as follows:
+ 1) Initialize all structures.
+ 2) Create a read/write thread per CPU. Each thread is bound to a CPU.
+    The read/write threads hold it.
+ 3) A controller thread does poll() for a start order of a host.
+ 4) After the controller of the trace agent receives a start order from a host,
+    the controller wake read/write threads.
+ 5) The read/write threads start to read trace data from ring-buffers and
+    write the data to virtio-serial.
+ 6) If the controller receives a stop order from a host, the read/write threads
+    stop to read trace data.
+
+
+Files
+=====
+
+README: this file
+Makefile: Makefile of trace agent for virtio-trace
+trace-agent.c: includes main function, sets up for operating trace agent
+trace-agent.h: includes all structures and some macros
+trace-agent-ctl.c: includes controller function for read/write threads
+trace-agent-rw.c: includes read/write threads function
+
+
+Setup
+=====
+
+To use this trace agent for virtio-trace, we need to prepare some virtio-serial
+I/Fs.
+
+1) Make FIFO in a host
+ virtio-trace uses virtio-serial pipe as trace data paths as to the number
+of CPUs and a control path, so FIFO (named pipe) should be created as follows:
+	# mkdir /tmp/virtio-trace/
+	# mkfifo /tmp/virtio-trace/trace-path-cpu{0,1,2,...,X}.{in,out}
+	# mkfifo /tmp/virtio-trace/agent-ctl-path.{in,out}
+
+For example, if a guest use three CPUs, the names are
+	trace-path-cpu{0,1,2}.{in.out}
+and
+	agent-ctl-path.{in,out}.
+
+2) Set up of virtio-serial pipe in a host
+ Add qemu option to use virtio-serial pipe.
+
+ ##virtio-serial device##
+     -device virtio-serial-pci,id=virtio-serial0\
+ ##control path##
+     -chardev pipe,id=charchannel0,path=/tmp/virtio-trace/agent-ctl-path\
+     -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,\
+      id=channel0,name=agent-ctl-path\
+ ##data path##
+     -chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\
+     -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\
+      id=channel1,name=trace-path-cpu0\
+      ...
+
+If you manage guests with libvirt, add the following tags to domain XML files.
+Then, libvirt passes the same command option to qemu.
+
+	<channel type='pipe'>
+	   <source path='/tmp/virtio-trace/agent-ctl-path'/>
+	   <target type='virtio' name='agent-ctl-path'/>
+	   <address type='virtio-serial' controller='0' bus='0' port='0'/>
+	</channel>
+	<channel type='pipe'>
+	   <source path='/tmp/virtio-trace/trace-path-cpu0'/>
+	   <target type='virtio' name='trace-path-cpu0'/>
+	   <address type='virtio-serial' controller='0' bus='0' port='1'/>
+	</channel>
+	...
+Here, chardev names are restricted to trace-path-cpuX and agent-ctl-path. For
+example, if a guest use three CPUs, chardev names should be trace-path-cpu0,
+trace-path-cpu1, trace-path-cpu2, and agent-ctl-path.
+
+3) Boot the guest
+ You can find some chardev in /dev/virtio-ports/ in the guest.
+
+
+Run
+===
+
+0) Build trace agent in a guest
+	$ make
+
+1) Enable ftrace in the guest
+ <Example>
+	# echo 1 > /sys/kernel/debug/tracing/events/sched/enable
+
+2) Run trace agent in the guest
+ This agent must be operated as root.
+	# ./trace-agent
+read/write threads in the agent wait for start order from host. If you add -o
+option, trace data are output via stdout in the guest.
+
+3) Open FIFO in a host
+	# cat /tmp/virtio-trace/trace-path-cpu0.out
+If a host does not open these, trace data get stuck in buffers of virtio. Then,
+the guest will stop by specification of chardev in QEMU. This blocking mode may
+be solved in the future.
+
+4) Start to read trace data by ordering from a host
+ A host injects read start order to the guest via virtio-serial.
+	# echo 1 > /tmp/virtio-trace/agent-ctl-path.in
+
+5) Stop to read trace data by ordering from a host
+ A host injects read stop order to the guest via virtio-serial.
+	# echo 0 > /tmp/virtio-trace/agent-ctl-path.in
diff --git a/tools/virtio/virtio-trace/trace-agent-ctl.c b/tools/virtio/virtio-trace/trace-agent-ctl.c
new file mode 100644
index 000000000..a2d0403c4
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent-ctl.c
@@ -0,0 +1,137 @@
+/*
+ * Controller of read/write threads for virtio-trace
+ *
+ * Copyright (C) 2012 Hitachi, Ltd.
+ * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
+ *            Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "trace-agent.h"
+
+#define HOST_MSG_SIZE		256
+#define EVENT_WAIT_MSEC		100
+
+static volatile sig_atomic_t global_signal_val;
+bool global_sig_receive;	/* default false */
+bool global_run_operation;	/* default false*/
+
+/* Handle SIGTERM/SIGINT/SIGQUIT to exit */
+static void signal_handler(int sig)
+{
+	global_signal_val = sig;
+}
+
+int rw_ctl_init(const char *ctl_path)
+{
+	int ctl_fd;
+
+	ctl_fd = open(ctl_path, O_RDONLY);
+	if (ctl_fd == -1) {
+		pr_err("Cannot open ctl_fd\n");
+		goto error;
+	}
+
+	return ctl_fd;
+
+error:
+	exit(EXIT_FAILURE);
+}
+
+static int wait_order(int ctl_fd)
+{
+	struct pollfd poll_fd;
+	int ret = 0;
+
+	while (!global_sig_receive) {
+		poll_fd.fd = ctl_fd;
+		poll_fd.events = POLLIN;
+
+		ret = poll(&poll_fd, 1, EVENT_WAIT_MSEC);
+
+		if (global_signal_val) {
+			global_sig_receive = true;
+			pr_info("Receive interrupt %d\n", global_signal_val);
+
+			/* Wakes rw-threads when they are sleeping */
+			if (!global_run_operation)
+				pthread_cond_broadcast(&cond_wakeup);
+
+			ret = -1;
+			break;
+		}
+
+		if (ret < 0) {
+			pr_err("Polling error\n");
+			goto error;
+		}
+
+		if (ret)
+			break;
+	};
+
+	return ret;
+
+error:
+	exit(EXIT_FAILURE);
+}
+
+/*
+ * contol read/write threads by handling global_run_operation
+ */
+void *rw_ctl_loop(int ctl_fd)
+{
+	ssize_t rlen;
+	char buf[HOST_MSG_SIZE];
+	int ret;
+
+	/* Setup signal handlers */
+	signal(SIGTERM, signal_handler);
+	signal(SIGINT, signal_handler);
+	signal(SIGQUIT, signal_handler);
+
+	while (!global_sig_receive) {
+
+		ret = wait_order(ctl_fd);
+		if (ret < 0)
+			break;
+
+		rlen = read(ctl_fd, buf, sizeof(buf));
+		if (rlen < 0) {
+			pr_err("read data error in ctl thread\n");
+			goto error;
+		}
+
+		if (rlen == 2 && buf[0] == '1') {
+			/*
+			 * If host writes '1' to a control path,
+			 * this controller wakes all read/write threads.
+			 */
+			global_run_operation = true;
+			pthread_cond_broadcast(&cond_wakeup);
+			pr_debug("Wake up all read/write threads\n");
+		} else if (rlen == 2 && buf[0] == '0') {
+			/*
+			 * If host writes '0' to a control path, read/write
+			 * threads will wait for notification from Host.
+			 */
+			global_run_operation = false;
+			pr_debug("Stop all read/write threads\n");
+		} else
+			pr_info("Invalid host notification: %s\n", buf);
+	}
+
+	return NULL;
+
+error:
+	exit(EXIT_FAILURE);
+}
diff --git a/tools/virtio/virtio-trace/trace-agent-rw.c b/tools/virtio/virtio-trace/trace-agent-rw.c
new file mode 100644
index 000000000..3aace5ea4
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent-rw.c
@@ -0,0 +1,192 @@
+/*
+ * Read/write thread of a guest agent for virtio-trace
+ *
+ * Copyright (C) 2012 Hitachi, Ltd.
+ * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
+ *            Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include "trace-agent.h"
+
+#define READ_WAIT_USEC	100000
+
+void *rw_thread_info_new(void)
+{
+	struct rw_thread_info *rw_ti;
+
+	rw_ti = zalloc(sizeof(struct rw_thread_info));
+	if (rw_ti == NULL) {
+		pr_err("rw_thread_info zalloc error\n");
+		exit(EXIT_FAILURE);
+	}
+
+	rw_ti->cpu_num = -1;
+	rw_ti->in_fd = -1;
+	rw_ti->out_fd = -1;
+	rw_ti->read_pipe = -1;
+	rw_ti->write_pipe = -1;
+	rw_ti->pipe_size = PIPE_INIT;
+
+	return rw_ti;
+}
+
+void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
+				bool stdout_flag, unsigned long pipe_size,
+				struct rw_thread_info *rw_ti)
+{
+	int data_pipe[2];
+
+	rw_ti->cpu_num = cpu;
+
+	/* set read(input) fd */
+	rw_ti->in_fd = open(in_path, O_RDONLY);
+	if (rw_ti->in_fd == -1) {
+		pr_err("Could not open in_fd (CPU:%d)\n", cpu);
+		goto error;
+	}
+
+	/* set write(output) fd */
+	if (!stdout_flag) {
+		/* virtio-serial output mode */
+		rw_ti->out_fd = open(out_path, O_WRONLY);
+		if (rw_ti->out_fd == -1) {
+			pr_err("Could not open out_fd (CPU:%d)\n", cpu);
+			goto error;
+		}
+	} else
+		/* stdout mode */
+		rw_ti->out_fd = STDOUT_FILENO;
+
+	if (pipe2(data_pipe, O_NONBLOCK) < 0) {
+		pr_err("Could not create pipe in rw-thread(%d)\n", cpu);
+		goto error;
+	}
+
+	/*
+	 * Size of pipe is 64kB in default based on fs/pipe.c.
+	 * To read/write trace data speedy, pipe size is changed.
+	 */
+	if (fcntl(*data_pipe, F_SETPIPE_SZ, pipe_size) < 0) {
+		pr_err("Could not change pipe size in rw-thread(%d)\n", cpu);
+		goto error;
+	}
+
+	rw_ti->read_pipe = data_pipe[1];
+	rw_ti->write_pipe = data_pipe[0];
+	rw_ti->pipe_size = pipe_size;
+
+	return NULL;
+
+error:
+	exit(EXIT_FAILURE);
+}
+
+/* Bind a thread to a cpu */
+static void bind_cpu(int cpu_num)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu_num, &mask);
+
+	/* bind my thread to cpu_num by assigning zero to the first argument */
+	if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
+		pr_err("Could not set CPU#%d affinity\n", (int)cpu_num);
+}
+
+static void *rw_thread_main(void *thread_info)
+{
+	ssize_t rlen, wlen;
+	ssize_t ret;
+	struct rw_thread_info *ts = (struct rw_thread_info *)thread_info;
+
+	bind_cpu(ts->cpu_num);
+
+	while (1) {
+		/* Wait for a read order of trace data by Host OS */
+		if (!global_run_operation) {
+			pthread_mutex_lock(&mutex_notify);
+			pthread_cond_wait(&cond_wakeup, &mutex_notify);
+			pthread_mutex_unlock(&mutex_notify);
+		}
+
+		if (global_sig_receive)
+			break;
+
+		/*
+		 * Each thread read trace_pipe_raw of each cpu bounding the
+		 * thread, so contention of multi-threads does not occur.
+		 */
+		rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL,
+				ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE);
+
+		if (rlen < 0) {
+			pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num);
+			goto error;
+		} else if (rlen == 0) {
+			/*
+			 * If trace data do not exist or are unreadable not
+			 * for exceeding the page size, splice_read returns
+			 * NULL. Then, this waits for being filled the data in a
+			 * ring-buffer.
+			 */
+			usleep(READ_WAIT_USEC);
+			pr_debug("Read retry(cpu:%d)\n", ts->cpu_num);
+			continue;
+		}
+
+		wlen = 0;
+
+		do {
+			ret = splice(ts->write_pipe, NULL, ts->out_fd, NULL,
+					rlen - wlen,
+					SPLICE_F_MOVE | SPLICE_F_MORE);
+
+			if (ret < 0) {
+				pr_err("Splice_write in rw-thread(%d)\n",
+								ts->cpu_num);
+				goto error;
+			} else if (ret == 0)
+				/*
+				 * When host reader is not in time for reading
+				 * trace data, guest will be stopped. This is
+				 * because char dev in QEMU is not supported
+				 * non-blocking mode. Then, writer might be
+				 * sleep in that case.
+				 * This sleep will be removed by supporting
+				 * non-blocking mode.
+				 */
+				sleep(1);
+			wlen += ret;
+		} while (wlen < rlen);
+	}
+
+	return NULL;
+
+error:
+	exit(EXIT_FAILURE);
+}
+
+
+pthread_t rw_thread_run(struct rw_thread_info *rw_ti)
+{
+	int ret;
+	pthread_t rw_thread_per_cpu;
+
+	ret = pthread_create(&rw_thread_per_cpu, NULL, rw_thread_main, rw_ti);
+	if (ret != 0) {
+		pr_err("Could not create a rw thread(%d)\n", rw_ti->cpu_num);
+		exit(EXIT_FAILURE);
+	}
+
+	return rw_thread_per_cpu;
+}
diff --git a/tools/virtio/virtio-trace/trace-agent.c b/tools/virtio/virtio-trace/trace-agent.c
new file mode 100644
index 000000000..0a0a7dd4e
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent.c
@@ -0,0 +1,270 @@
+/*
+ * Guest agent for virtio-trace
+ *
+ * Copyright (C) 2012 Hitachi, Ltd.
+ * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
+ *            Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "trace-agent.h"
+
+#define PAGE_SIZE		(sysconf(_SC_PAGE_SIZE))
+#define PIPE_DEF_BUFS		16
+#define PIPE_MIN_SIZE		(PAGE_SIZE*PIPE_DEF_BUFS)
+#define PIPE_MAX_SIZE		(1024*1024)
+#define READ_PATH_FMT	\
+		"/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw"
+#define WRITE_PATH_FMT		"/dev/virtio-ports/trace-path-cpu%d"
+#define CTL_PATH		"/dev/virtio-ports/agent-ctl-path"
+
+pthread_mutex_t mutex_notify = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t cond_wakeup = PTHREAD_COND_INITIALIZER;
+
+static int get_total_cpus(void)
+{
+	int nr_cpus = (int)sysconf(_SC_NPROCESSORS_CONF);
+
+	if (nr_cpus <= 0) {
+		pr_err("Could not read cpus\n");
+		goto error;
+	} else if (nr_cpus > MAX_CPUS) {
+		pr_err("Exceed max cpus(%d)\n", (int)MAX_CPUS);
+		goto error;
+	}
+
+	return nr_cpus;
+
+error:
+	exit(EXIT_FAILURE);
+}
+
+static void *agent_info_new(void)
+{
+	struct agent_info *s;
+	int i;
+
+	s = zalloc(sizeof(struct agent_info));
+	if (s == NULL) {
+		pr_err("agent_info zalloc error\n");
+		exit(EXIT_FAILURE);
+	}
+
+	s->pipe_size = PIPE_INIT;
+	s->use_stdout = false;
+	s->cpus = get_total_cpus();
+	s->ctl_fd = -1;
+
+	/* read/write threads init */
+	for (i = 0; i < s->cpus; i++)
+		s->rw_ti[i] = rw_thread_info_new();
+
+	return s;
+}
+
+static unsigned long parse_size(const char *arg)
+{
+	unsigned long value, round;
+	char *ptr;
+
+	value = strtoul(arg, &ptr, 10);
+	switch (*ptr) {
+	case 'K': case 'k':
+		value <<= 10;
+		break;
+	case 'M': case 'm':
+		value <<= 20;
+		break;
+	default:
+		break;
+	}
+
+	if (value > PIPE_MAX_SIZE) {
+		pr_err("Pipe size must be less than 1MB\n");
+		goto error;
+	} else if (value < PIPE_MIN_SIZE) {
+		pr_err("Pipe size must be over 64KB\n");
+		goto error;
+	}
+
+	/* Align buffer size with page unit */
+	round = value & (PAGE_SIZE - 1);
+	value = value - round;
+
+	return value;
+error:
+	return 0;
+}
+
+static void usage(char const *prg)
+{
+	pr_err("usage: %s [-h] [-o] [-s <size of pipe>]\n", prg);
+}
+
+static const char *make_path(int cpu_num, bool this_is_write_path)
+{
+	int ret;
+	char *buf;
+
+	buf = zalloc(PATH_MAX);
+	if (buf == NULL) {
+		pr_err("Could not allocate buffer\n");
+		goto error;
+	}
+
+	if (this_is_write_path)
+		/* write(output) path */
+		ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num);
+	else
+		/* read(input) path */
+		ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num);
+
+	if (ret <= 0) {
+		pr_err("Failed to generate %s path(CPU#%d):%d\n",
+			this_is_write_path ? "read" : "write", cpu_num, ret);
+		goto error;
+	}
+
+	return buf;
+
+error:
+	free(buf);
+	return NULL;
+}
+
+static const char *make_input_path(int cpu_num)
+{
+	return make_path(cpu_num, false);
+}
+
+static const char *make_output_path(int cpu_num)
+{
+	return make_path(cpu_num, true);
+}
+
+static void *agent_info_init(struct agent_info *s)
+{
+	int cpu;
+	const char *in_path = NULL;
+	const char *out_path = NULL;
+
+	/* init read/write threads */
+	for (cpu = 0; cpu < s->cpus; cpu++) {
+		/* set read(input) path per read/write thread */
+		in_path = make_input_path(cpu);
+		if (in_path == NULL)
+			goto error;
+
+		/* set write(output) path per read/write thread*/
+		if (!s->use_stdout) {
+			out_path = make_output_path(cpu);
+			if (out_path == NULL)
+				goto error;
+		} else
+			/* stdout mode */
+			pr_debug("stdout mode\n");
+
+		rw_thread_init(cpu, in_path, out_path, s->use_stdout,
+						s->pipe_size, s->rw_ti[cpu]);
+	}
+
+	/* init controller of read/write threads */
+	s->ctl_fd = rw_ctl_init((const char *)CTL_PATH);
+
+	return NULL;
+
+error:
+	exit(EXIT_FAILURE);
+}
+
+static void *parse_args(int argc, char *argv[], struct agent_info *s)
+{
+	int cmd;
+	unsigned long size;
+
+	while ((cmd = getopt(argc, argv, "hos:")) != -1) {
+		switch (cmd) {
+		/* stdout mode */
+		case 'o':
+			s->use_stdout = true;
+			break;
+		/* size of pipe */
+		case 's':
+			size = parse_size(optarg);
+			if (size == 0)
+				goto error;
+			s->pipe_size = size;
+			break;
+		case 'h':
+		default:
+			usage(argv[0]);
+			goto error;
+		}
+	}
+
+	agent_info_init(s);
+
+	return NULL;
+
+error:
+	exit(EXIT_FAILURE);
+}
+
+static void agent_main_loop(struct agent_info *s)
+{
+	int cpu;
+	pthread_t rw_thread_per_cpu[MAX_CPUS];
+
+	/* Start all read/write threads */
+	for (cpu = 0; cpu < s->cpus; cpu++)
+		rw_thread_per_cpu[cpu] = rw_thread_run(s->rw_ti[cpu]);
+
+	rw_ctl_loop(s->ctl_fd);
+
+	/* Finish all read/write threads */
+	for (cpu = 0; cpu < s->cpus; cpu++) {
+		int ret;
+
+		ret = pthread_join(rw_thread_per_cpu[cpu], NULL);
+		if (ret != 0) {
+			pr_err("pthread_join() error:%d (cpu %d)\n", ret, cpu);
+			exit(EXIT_FAILURE);
+		}
+	}
+}
+
+static void agent_info_free(struct agent_info *s)
+{
+	int i;
+
+	close(s->ctl_fd);
+	for (i = 0; i < s->cpus; i++) {
+		close(s->rw_ti[i]->in_fd);
+		close(s->rw_ti[i]->out_fd);
+		close(s->rw_ti[i]->read_pipe);
+		close(s->rw_ti[i]->write_pipe);
+		free(s->rw_ti[i]);
+	}
+	free(s);
+}
+
+int main(int argc, char *argv[])
+{
+	struct agent_info *s = NULL;
+
+	s = agent_info_new();
+	parse_args(argc, argv, s);
+
+	agent_main_loop(s);
+
+	agent_info_free(s);
+
+	return 0;
+}
diff --git a/tools/virtio/virtio-trace/trace-agent.h b/tools/virtio/virtio-trace/trace-agent.h
new file mode 100644
index 000000000..e67885969
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRACE_AGENT_H__
+#define __TRACE_AGENT_H__
+#include <pthread.h>
+#include <stdbool.h>
+
+#define MAX_CPUS	256
+#define PIPE_INIT       (1024*1024)
+
+/*
+ * agent_info - structure managing total information of guest agent
+ * @pipe_size:	size of pipe (default 1MB)
+ * @use_stdout:	set to true when o option is added (default false)
+ * @cpus:	total number of CPUs
+ * @ctl_fd:	fd of control path, /dev/virtio-ports/agent-ctl-path
+ * @rw_ti:	structure managing information of read/write threads
+ */
+struct agent_info {
+	unsigned long pipe_size;
+	bool use_stdout;
+	int cpus;
+	int ctl_fd;
+	struct rw_thread_info *rw_ti[MAX_CPUS];
+};
+
+/*
+ * rw_thread_info - structure managing a read/write thread a cpu
+ * @cpu_num:	cpu number operating this read/write thread
+ * @in_fd:	fd of reading trace data path in cpu_num
+ * @out_fd:	fd of writing trace data path in cpu_num
+ * @read_pipe:	fd of read pipe
+ * @write_pipe:	fd of write pipe
+ * @pipe_size:	size of pipe (default 1MB)
+ */
+struct rw_thread_info {
+	int cpu_num;
+	int in_fd;
+	int out_fd;
+	int read_pipe;
+	int write_pipe;
+	unsigned long pipe_size;
+};
+
+/* use for stopping rw threads */
+extern bool global_sig_receive;
+
+/* use for notification */
+extern bool global_run_operation;
+extern pthread_mutex_t mutex_notify;
+extern pthread_cond_t cond_wakeup;
+
+/* for controller of read/write threads */
+extern int rw_ctl_init(const char *ctl_path);
+extern void *rw_ctl_loop(int ctl_fd);
+
+/* for trace read/write thread */
+extern void *rw_thread_info_new(void);
+extern void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
+			bool stdout_flag, unsigned long pipe_size,
+			struct rw_thread_info *rw_ti);
+extern pthread_t rw_thread_run(struct rw_thread_info *rw_ti);
+
+static inline void *zalloc(size_t size)
+{
+	return calloc(1, size);
+}
+
+#define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+#define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
+#ifdef DEBUG
+#define pr_debug(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+#else
+#define pr_debug(format, ...) do {} while (0)
+#endif
+
+#endif /*__TRACE_AGENT_H__*/
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
new file mode 100644
index 000000000..b427def67
--- /dev/null
+++ b/tools/virtio/virtio_test.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <string.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <linux/virtio_types.h>
+#include <linux/vhost.h>
+#include <linux/virtio.h>
+#include <linux/virtio_ring.h>
+#include "../../drivers/vhost/test.h"
+
+/* Unused */
+void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+
+struct vq_info {
+	int kick;
+	int call;
+	int num;
+	int idx;
+	void *ring;
+	/* copy used for control */
+	struct vring vring;
+	struct virtqueue *vq;
+};
+
+struct vdev_info {
+	struct virtio_device vdev;
+	int control;
+	struct pollfd fds[1];
+	struct vq_info vqs[1];
+	int nvqs;
+	void *buf;
+	size_t buf_size;
+	struct vhost_memory *mem;
+};
+
+bool vq_notify(struct virtqueue *vq)
+{
+	struct vq_info *info = vq->priv;
+	unsigned long long v = 1;
+	int r;
+	r = write(info->kick, &v, sizeof v);
+	assert(r == sizeof v);
+	return true;
+}
+
+void vq_callback(struct virtqueue *vq)
+{
+}
+
+
+void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
+{
+	struct vhost_vring_state state = { .index = info->idx };
+	struct vhost_vring_file file = { .index = info->idx };
+	unsigned long long features = dev->vdev.features;
+	struct vhost_vring_addr addr = {
+		.index = info->idx,
+		.desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
+		.avail_user_addr = (uint64_t)(unsigned long)info->vring.avail,
+		.used_user_addr = (uint64_t)(unsigned long)info->vring.used,
+	};
+	int r;
+	r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
+	assert(r >= 0);
+	state.num = info->vring.num;
+	r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
+	assert(r >= 0);
+	state.num = 0;
+	r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
+	assert(r >= 0);
+	r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
+	assert(r >= 0);
+	file.fd = info->kick;
+	r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+	assert(r >= 0);
+	file.fd = info->call;
+	r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+	assert(r >= 0);
+}
+
+static void vq_info_add(struct vdev_info *dev, int num)
+{
+	struct vq_info *info = &dev->vqs[dev->nvqs];
+	int r;
+	info->idx = dev->nvqs;
+	info->kick = eventfd(0, EFD_NONBLOCK);
+	info->call = eventfd(0, EFD_NONBLOCK);
+	r = posix_memalign(&info->ring, 4096, vring_size(num, 4096));
+	assert(r >= 0);
+	memset(info->ring, 0, vring_size(num, 4096));
+	vring_init(&info->vring, num, info->ring, 4096);
+	info->vq = vring_new_virtqueue(info->idx,
+				       info->vring.num, 4096, &dev->vdev,
+				       true, false, info->ring,
+				       vq_notify, vq_callback, "test");
+	assert(info->vq);
+	info->vq->priv = info;
+	vhost_vq_setup(dev, info);
+	dev->fds[info->idx].fd = info->call;
+	dev->fds[info->idx].events = POLLIN;
+	dev->nvqs++;
+}
+
+static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
+{
+	int r;
+	memset(dev, 0, sizeof *dev);
+	dev->vdev.features = features;
+	dev->buf_size = 1024;
+	dev->buf = malloc(dev->buf_size);
+	assert(dev->buf);
+        dev->control = open("/dev/vhost-test", O_RDWR);
+	assert(dev->control >= 0);
+	r = ioctl(dev->control, VHOST_SET_OWNER, NULL);
+	assert(r >= 0);
+	dev->mem = malloc(offsetof(struct vhost_memory, regions) +
+			  sizeof dev->mem->regions[0]);
+	assert(dev->mem);
+	memset(dev->mem, 0, offsetof(struct vhost_memory, regions) +
+                          sizeof dev->mem->regions[0]);
+	dev->mem->nregions = 1;
+	dev->mem->regions[0].guest_phys_addr = (long)dev->buf;
+	dev->mem->regions[0].userspace_addr = (long)dev->buf;
+	dev->mem->regions[0].memory_size = dev->buf_size;
+	r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+	assert(r >= 0);
+}
+
+/* TODO: this is pretty bad: we get a cache line bounce
+ * for the wait queue on poll and another one on read,
+ * plus the read which is there just to clear the
+ * current state. */
+static void wait_for_interrupt(struct vdev_info *dev)
+{
+	int i;
+	unsigned long long val;
+	poll(dev->fds, dev->nvqs, -1);
+	for (i = 0; i < dev->nvqs; ++i)
+		if (dev->fds[i].revents & POLLIN) {
+			read(dev->fds[i].fd, &val, sizeof val);
+		}
+}
+
+static void run_test(struct vdev_info *dev, struct vq_info *vq,
+		     bool delayed, int bufs)
+{
+	struct scatterlist sl;
+	long started = 0, completed = 0;
+	long completed_before;
+	int r, test = 1;
+	unsigned len;
+	long long spurious = 0;
+	r = ioctl(dev->control, VHOST_TEST_RUN, &test);
+	assert(r >= 0);
+	for (;;) {
+		virtqueue_disable_cb(vq->vq);
+		completed_before = completed;
+		do {
+			if (started < bufs) {
+				sg_init_one(&sl, dev->buf, dev->buf_size);
+				r = virtqueue_add_outbuf(vq->vq, &sl, 1,
+							 dev->buf + started,
+							 GFP_ATOMIC);
+				if (likely(r == 0)) {
+					++started;
+					if (unlikely(!virtqueue_kick(vq->vq)))
+						r = -1;
+				}
+			} else
+				r = -1;
+
+			/* Flush out completed bufs if any */
+			if (virtqueue_get_buf(vq->vq, &len)) {
+				++completed;
+				r = 0;
+			}
+
+		} while (r == 0);
+		if (completed == completed_before)
+			++spurious;
+		assert(completed <= bufs);
+		assert(started <= bufs);
+		if (completed == bufs)
+			break;
+		if (delayed) {
+			if (virtqueue_enable_cb_delayed(vq->vq))
+				wait_for_interrupt(dev);
+		} else {
+			if (virtqueue_enable_cb(vq->vq))
+				wait_for_interrupt(dev);
+		}
+	}
+	test = 0;
+	r = ioctl(dev->control, VHOST_TEST_RUN, &test);
+	assert(r >= 0);
+	fprintf(stderr, "spurious wakeups: 0x%llx\n", spurious);
+}
+
+const char optstring[] = "h";
+const struct option longopts[] = {
+	{
+		.name = "help",
+		.val = 'h',
+	},
+	{
+		.name = "event-idx",
+		.val = 'E',
+	},
+	{
+		.name = "no-event-idx",
+		.val = 'e',
+	},
+	{
+		.name = "indirect",
+		.val = 'I',
+	},
+	{
+		.name = "no-indirect",
+		.val = 'i',
+	},
+	{
+		.name = "virtio-1",
+		.val = '1',
+	},
+	{
+		.name = "no-virtio-1",
+		.val = '0',
+	},
+	{
+		.name = "delayed-interrupt",
+		.val = 'D',
+	},
+	{
+		.name = "no-delayed-interrupt",
+		.val = 'd',
+	},
+	{
+	}
+};
+
+static void help(void)
+{
+	fprintf(stderr, "Usage: virtio_test [--help]"
+		" [--no-indirect]"
+		" [--no-event-idx]"
+		" [--no-virtio-1]"
+		" [--delayed-interrupt]"
+		"\n");
+}
+
+int main(int argc, char **argv)
+{
+	struct vdev_info dev;
+	unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
+		(1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1);
+	int o;
+	bool delayed = false;
+
+	for (;;) {
+		o = getopt_long(argc, argv, optstring, longopts, NULL);
+		switch (o) {
+		case -1:
+			goto done;
+		case '?':
+			help();
+			exit(2);
+		case 'e':
+			features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX);
+			break;
+		case 'h':
+			help();
+			goto done;
+		case 'i':
+			features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
+			break;
+		case '0':
+			features &= ~(1ULL << VIRTIO_F_VERSION_1);
+			break;
+		case 'D':
+			delayed = true;
+			break;
+		default:
+			assert(0);
+			break;
+		}
+	}
+
+done:
+	vdev_info_init(&dev, features);
+	vq_info_add(&dev, 256);
+	run_test(&dev, &dev.vqs[0], delayed, 0x100000);
+	return 0;
+}
diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
new file mode 100644
index 000000000..293653463
--- /dev/null
+++ b/tools/virtio/vringh_test.c
@@ -0,0 +1,751 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Simple test of virtio code, entirely in userpsace. */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <err.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/virtio.h>
+#include <linux/vringh.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
+#include <linux/uaccess.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+
+#define USER_MEM (1024*1024)
+void *__user_addr_min, *__user_addr_max;
+void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+static u64 user_addr_offset;
+
+#define RINGSIZE 256
+#define ALIGN 4096
+
+static bool never_notify_host(struct virtqueue *vq)
+{
+	abort();
+}
+
+static void never_callback_guest(struct virtqueue *vq)
+{
+	abort();
+}
+
+static bool getrange_iov(struct vringh *vrh, u64 addr, struct vringh_range *r)
+{
+	if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
+		return false;
+	if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
+		return false;
+
+	r->start = (u64)(unsigned long)__user_addr_min - user_addr_offset;
+	r->end_incl = (u64)(unsigned long)__user_addr_max - 1 - user_addr_offset;
+	r->offset = user_addr_offset;
+	return true;
+}
+
+/* We return single byte ranges. */
+static bool getrange_slow(struct vringh *vrh, u64 addr, struct vringh_range *r)
+{
+	if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
+		return false;
+	if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
+		return false;
+
+	r->start = addr;
+	r->end_incl = r->start;
+	r->offset = user_addr_offset;
+	return true;
+}
+
+struct guest_virtio_device {
+	struct virtio_device vdev;
+	int to_host_fd;
+	unsigned long notifies;
+};
+
+static bool parallel_notify_host(struct virtqueue *vq)
+{
+	int rc;
+	struct guest_virtio_device *gvdev;
+
+	gvdev = container_of(vq->vdev, struct guest_virtio_device, vdev);
+	rc = write(gvdev->to_host_fd, "", 1);
+	if (rc < 0)
+		return false;
+	gvdev->notifies++;
+	return true;
+}
+
+static bool no_notify_host(struct virtqueue *vq)
+{
+	return true;
+}
+
+#define NUM_XFERS (10000000)
+
+/* We aim for two "distant" cpus. */
+static void find_cpus(unsigned int *first, unsigned int *last)
+{
+	unsigned int i;
+
+	*first = -1U;
+	*last = 0;
+	for (i = 0; i < 4096; i++) {
+		cpu_set_t set;
+		CPU_ZERO(&set);
+		CPU_SET(i, &set);
+		if (sched_setaffinity(getpid(), sizeof(set), &set) == 0) {
+			if (i < *first)
+				*first = i;
+			if (i > *last)
+				*last = i;
+		}
+	}
+}
+
+/* Opencoded version for fast mode */
+static inline int vringh_get_head(struct vringh *vrh, u16 *head)
+{
+	u16 avail_idx, i;
+	int err;
+
+	err = get_user(avail_idx, &vrh->vring.avail->idx);
+	if (err)
+		return err;
+
+	if (vrh->last_avail_idx == avail_idx)
+		return 0;
+
+	/* Only get avail ring entries after they have been exposed by guest. */
+	virtio_rmb(vrh->weak_barriers);
+
+	i = vrh->last_avail_idx & (vrh->vring.num - 1);
+
+	err = get_user(*head, &vrh->vring.avail->ring[i]);
+	if (err)
+		return err;
+
+	vrh->last_avail_idx++;
+	return 1;
+}
+
+static int parallel_test(u64 features,
+			 bool (*getrange)(struct vringh *vrh,
+					  u64 addr, struct vringh_range *r),
+			 bool fast_vringh)
+{
+	void *host_map, *guest_map;
+	int fd, mapsize, to_guest[2], to_host[2];
+	unsigned long xfers = 0, notifies = 0, receives = 0;
+	unsigned int first_cpu, last_cpu;
+	cpu_set_t cpu_set;
+	char buf[128];
+
+	/* Create real file to mmap. */
+	fd = open("/tmp/vringh_test-file", O_RDWR|O_CREAT|O_TRUNC, 0600);
+	if (fd < 0)
+		err(1, "Opening /tmp/vringh_test-file");
+
+	/* Extra room at the end for some data, and indirects */
+	mapsize = vring_size(RINGSIZE, ALIGN)
+		+ RINGSIZE * 2 * sizeof(int)
+		+ RINGSIZE * 6 * sizeof(struct vring_desc);
+	mapsize = (mapsize + getpagesize() - 1) & ~(getpagesize() - 1);
+	ftruncate(fd, mapsize);
+
+	/* Parent and child use separate addresses, to check our mapping logic! */
+	host_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+	guest_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+
+	pipe(to_guest);
+	pipe(to_host);
+
+	CPU_ZERO(&cpu_set);
+	find_cpus(&first_cpu, &last_cpu);
+	printf("Using CPUS %u and %u\n", first_cpu, last_cpu);
+	fflush(stdout);
+
+	if (fork() != 0) {
+		struct vringh vrh;
+		int status, err, rlen = 0;
+		char rbuf[5];
+
+		/* We are the host: never access guest addresses! */
+		munmap(guest_map, mapsize);
+
+		__user_addr_min = host_map;
+		__user_addr_max = __user_addr_min + mapsize;
+		user_addr_offset = host_map - guest_map;
+		assert(user_addr_offset);
+
+		close(to_guest[0]);
+		close(to_host[1]);
+
+		vring_init(&vrh.vring, RINGSIZE, host_map, ALIGN);
+		vringh_init_user(&vrh, features, RINGSIZE, true,
+				 vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
+		CPU_SET(first_cpu, &cpu_set);
+		if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
+			errx(1, "Could not set affinity to cpu %u", first_cpu);
+
+		while (xfers < NUM_XFERS) {
+			struct iovec host_riov[2], host_wiov[2];
+			struct vringh_iov riov, wiov;
+			u16 head, written;
+
+			if (fast_vringh) {
+				for (;;) {
+					err = vringh_get_head(&vrh, &head);
+					if (err != 0)
+						break;
+					err = vringh_need_notify_user(&vrh);
+					if (err < 0)
+						errx(1, "vringh_need_notify_user: %i",
+						     err);
+					if (err) {
+						write(to_guest[1], "", 1);
+						notifies++;
+					}
+				}
+				if (err != 1)
+					errx(1, "vringh_get_head");
+				written = 0;
+				goto complete;
+			} else {
+				vringh_iov_init(&riov,
+						host_riov,
+						ARRAY_SIZE(host_riov));
+				vringh_iov_init(&wiov,
+						host_wiov,
+						ARRAY_SIZE(host_wiov));
+
+				err = vringh_getdesc_user(&vrh, &riov, &wiov,
+							  getrange, &head);
+			}
+			if (err == 0) {
+				err = vringh_need_notify_user(&vrh);
+				if (err < 0)
+					errx(1, "vringh_need_notify_user: %i",
+					     err);
+				if (err) {
+					write(to_guest[1], "", 1);
+					notifies++;
+				}
+
+				if (!vringh_notify_enable_user(&vrh))
+					continue;
+
+				/* Swallow all notifies at once. */
+				if (read(to_host[0], buf, sizeof(buf)) < 1)
+					break;
+
+				vringh_notify_disable_user(&vrh);
+				receives++;
+				continue;
+			}
+			if (err != 1)
+				errx(1, "vringh_getdesc_user: %i", err);
+
+			/* We simply copy bytes. */
+			if (riov.used) {
+				rlen = vringh_iov_pull_user(&riov, rbuf,
+							    sizeof(rbuf));
+				if (rlen != 4)
+					errx(1, "vringh_iov_pull_user: %i",
+					     rlen);
+				assert(riov.i == riov.used);
+				written = 0;
+			} else {
+				err = vringh_iov_push_user(&wiov, rbuf, rlen);
+				if (err != rlen)
+					errx(1, "vringh_iov_push_user: %i",
+					     err);
+				assert(wiov.i == wiov.used);
+				written = err;
+			}
+		complete:
+			xfers++;
+
+			err = vringh_complete_user(&vrh, head, written);
+			if (err != 0)
+				errx(1, "vringh_complete_user: %i", err);
+		}
+
+		err = vringh_need_notify_user(&vrh);
+		if (err < 0)
+			errx(1, "vringh_need_notify_user: %i", err);
+		if (err) {
+			write(to_guest[1], "", 1);
+			notifies++;
+		}
+		wait(&status);
+		if (!WIFEXITED(status))
+			errx(1, "Child died with signal %i?", WTERMSIG(status));
+		if (WEXITSTATUS(status) != 0)
+			errx(1, "Child exited %i?", WEXITSTATUS(status));
+		printf("Host: notified %lu, pinged %lu\n", notifies, receives);
+		return 0;
+	} else {
+		struct guest_virtio_device gvdev;
+		struct virtqueue *vq;
+		unsigned int *data;
+		struct vring_desc *indirects;
+		unsigned int finished = 0;
+
+		/* We pass sg[]s pointing into here, but we need RINGSIZE+1 */
+		data = guest_map + vring_size(RINGSIZE, ALIGN);
+		indirects = (void *)data + (RINGSIZE + 1) * 2 * sizeof(int);
+
+		/* We are the guest. */
+		munmap(host_map, mapsize);
+
+		close(to_guest[1]);
+		close(to_host[0]);
+
+		gvdev.vdev.features = features;
+		gvdev.to_host_fd = to_host[1];
+		gvdev.notifies = 0;
+
+		CPU_SET(first_cpu, &cpu_set);
+		if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
+			err(1, "Could not set affinity to cpu %u", first_cpu);
+
+		vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true,
+					 false, guest_map,
+					 fast_vringh ? no_notify_host
+					 : parallel_notify_host,
+					 never_callback_guest, "guest vq");
+
+		/* Don't kfree indirects. */
+		__kfree_ignore_start = indirects;
+		__kfree_ignore_end = indirects + RINGSIZE * 6;
+
+		while (xfers < NUM_XFERS) {
+			struct scatterlist sg[4];
+			unsigned int num_sg, len;
+			int *dbuf, err;
+			bool output = !(xfers % 2);
+
+			/* Consume bufs. */
+			while ((dbuf = virtqueue_get_buf(vq, &len)) != NULL) {
+				if (len == 4)
+					assert(*dbuf == finished - 1);
+				else if (!fast_vringh)
+					assert(*dbuf == finished);
+				finished++;
+			}
+
+			/* Produce a buffer. */
+			dbuf = data + (xfers % (RINGSIZE + 1));
+
+			if (output)
+				*dbuf = xfers;
+			else
+				*dbuf = -1;
+
+			switch ((xfers / sizeof(*dbuf)) % 4) {
+			case 0:
+				/* Nasty three-element sg list. */
+				sg_init_table(sg, num_sg = 3);
+				sg_set_buf(&sg[0], (void *)dbuf, 1);
+				sg_set_buf(&sg[1], (void *)dbuf + 1, 2);
+				sg_set_buf(&sg[2], (void *)dbuf + 3, 1);
+				break;
+			case 1:
+				sg_init_table(sg, num_sg = 2);
+				sg_set_buf(&sg[0], (void *)dbuf, 1);
+				sg_set_buf(&sg[1], (void *)dbuf + 1, 3);
+				break;
+			case 2:
+				sg_init_table(sg, num_sg = 1);
+				sg_set_buf(&sg[0], (void *)dbuf, 4);
+				break;
+			case 3:
+				sg_init_table(sg, num_sg = 4);
+				sg_set_buf(&sg[0], (void *)dbuf, 1);
+				sg_set_buf(&sg[1], (void *)dbuf + 1, 1);
+				sg_set_buf(&sg[2], (void *)dbuf + 2, 1);
+				sg_set_buf(&sg[3], (void *)dbuf + 3, 1);
+				break;
+			}
+
+			/* May allocate an indirect, so force it to allocate
+			 * user addr */
+			__kmalloc_fake = indirects + (xfers % RINGSIZE) * 4;
+			if (output)
+				err = virtqueue_add_outbuf(vq, sg, num_sg, dbuf,
+							   GFP_KERNEL);
+			else
+				err = virtqueue_add_inbuf(vq, sg, num_sg,
+							  dbuf, GFP_KERNEL);
+
+			if (err == -ENOSPC) {
+				if (!virtqueue_enable_cb_delayed(vq))
+					continue;
+				/* Swallow all notifies at once. */
+				if (read(to_guest[0], buf, sizeof(buf)) < 1)
+					break;
+				
+				receives++;
+				virtqueue_disable_cb(vq);
+				continue;
+			}
+
+			if (err)
+				errx(1, "virtqueue_add_in/outbuf: %i", err);
+
+			xfers++;
+			virtqueue_kick(vq);
+		}
+
+		/* Any extra? */
+		while (finished != xfers) {
+			int *dbuf;
+			unsigned int len;
+
+			/* Consume bufs. */
+			dbuf = virtqueue_get_buf(vq, &len);
+			if (dbuf) {
+				if (len == 4)
+					assert(*dbuf == finished - 1);
+				else
+					assert(len == 0);
+				finished++;
+				continue;
+			}
+
+			if (!virtqueue_enable_cb_delayed(vq))
+				continue;
+			if (read(to_guest[0], buf, sizeof(buf)) < 1)
+				break;
+				
+			receives++;
+			virtqueue_disable_cb(vq);
+		}
+
+		printf("Guest: notified %lu, pinged %lu\n",
+		       gvdev.notifies, receives);
+		vring_del_virtqueue(vq);
+		return 0;
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct virtio_device vdev;
+	struct virtqueue *vq;
+	struct vringh vrh;
+	struct scatterlist guest_sg[RINGSIZE], *sgs[2];
+	struct iovec host_riov[2], host_wiov[2];
+	struct vringh_iov riov, wiov;
+	struct vring_used_elem used[RINGSIZE];
+	char buf[28];
+	u16 head;
+	int err;
+	unsigned i;
+	void *ret;
+	bool (*getrange)(struct vringh *vrh, u64 addr, struct vringh_range *r);
+	bool fast_vringh = false, parallel = false;
+
+	getrange = getrange_iov;
+	vdev.features = 0;
+
+	while (argv[1]) {
+		if (strcmp(argv[1], "--indirect") == 0)
+			__virtio_set_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
+		else if (strcmp(argv[1], "--eventidx") == 0)
+			__virtio_set_bit(&vdev, VIRTIO_RING_F_EVENT_IDX);
+		else if (strcmp(argv[1], "--virtio-1") == 0)
+			__virtio_set_bit(&vdev, VIRTIO_F_VERSION_1);
+		else if (strcmp(argv[1], "--slow-range") == 0)
+			getrange = getrange_slow;
+		else if (strcmp(argv[1], "--fast-vringh") == 0)
+			fast_vringh = true;
+		else if (strcmp(argv[1], "--parallel") == 0)
+			parallel = true;
+		else
+			errx(1, "Unknown arg %s", argv[1]);
+		argv++;
+	}
+
+	if (parallel)
+		return parallel_test(vdev.features, getrange, fast_vringh);
+
+	if (posix_memalign(&__user_addr_min, PAGE_SIZE, USER_MEM) != 0)
+		abort();
+	__user_addr_max = __user_addr_min + USER_MEM;
+	memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN));
+
+	/* Set up guest side. */
+	vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, false,
+				 __user_addr_min,
+				 never_notify_host, never_callback_guest,
+				 "guest vq");
+
+	/* Set up host side. */
+	vring_init(&vrh.vring, RINGSIZE, __user_addr_min, ALIGN);
+	vringh_init_user(&vrh, vdev.features, RINGSIZE, true,
+			 vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
+
+	/* No descriptor to get yet... */
+	err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+	if (err != 0)
+		errx(1, "vringh_getdesc_user: %i", err);
+
+	/* Guest puts in a descriptor. */
+	memcpy(__user_addr_max - 1, "a", 1);
+	sg_init_table(guest_sg, 1);
+	sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
+	sg_init_table(guest_sg+1, 1);
+	sg_set_buf(&guest_sg[1], __user_addr_max - 3, 2);
+	sgs[0] = &guest_sg[0];
+	sgs[1] = &guest_sg[1];
+
+	/* May allocate an indirect, so force it to allocate user addr */
+	__kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
+	err = virtqueue_add_sgs(vq, sgs, 1, 1, &err, GFP_KERNEL);
+	if (err)
+		errx(1, "virtqueue_add_sgs: %i", err);
+	__kmalloc_fake = NULL;
+
+	/* Host retreives it. */
+	vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
+	vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
+
+	err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+	if (err != 1)
+		errx(1, "vringh_getdesc_user: %i", err);
+
+	assert(riov.used == 1);
+	assert(riov.iov[0].iov_base == __user_addr_max - 1);
+	assert(riov.iov[0].iov_len == 1);
+	if (getrange != getrange_slow) {
+		assert(wiov.used == 1);
+		assert(wiov.iov[0].iov_base == __user_addr_max - 3);
+		assert(wiov.iov[0].iov_len == 2);
+	} else {
+		assert(wiov.used == 2);
+		assert(wiov.iov[0].iov_base == __user_addr_max - 3);
+		assert(wiov.iov[0].iov_len == 1);
+		assert(wiov.iov[1].iov_base == __user_addr_max - 2);
+		assert(wiov.iov[1].iov_len == 1);
+	}
+
+	err = vringh_iov_pull_user(&riov, buf, 5);
+	if (err != 1)
+		errx(1, "vringh_iov_pull_user: %i", err);
+	assert(buf[0] == 'a');
+	assert(riov.i == 1);
+	assert(vringh_iov_pull_user(&riov, buf, 5) == 0);
+
+	memcpy(buf, "bcdef", 5);
+	err = vringh_iov_push_user(&wiov, buf, 5);
+	if (err != 2)
+		errx(1, "vringh_iov_push_user: %i", err);
+	assert(memcmp(__user_addr_max - 3, "bc", 2) == 0);
+	assert(wiov.i == wiov.used);
+	assert(vringh_iov_push_user(&wiov, buf, 5) == 0);
+
+	/* Host is done. */
+	err = vringh_complete_user(&vrh, head, err);
+	if (err != 0)
+		errx(1, "vringh_complete_user: %i", err);
+
+	/* Guest should see used token now. */
+	__kfree_ignore_start = __user_addr_min + vring_size(RINGSIZE, ALIGN);
+	__kfree_ignore_end = __kfree_ignore_start + 1;
+	ret = virtqueue_get_buf(vq, &i);
+	if (ret != &err)
+		errx(1, "virtqueue_get_buf: %p", ret);
+	assert(i == 2);
+
+	/* Guest puts in a huge descriptor. */
+	sg_init_table(guest_sg, RINGSIZE);
+	for (i = 0; i < RINGSIZE; i++) {
+		sg_set_buf(&guest_sg[i],
+			   __user_addr_max - USER_MEM/4, USER_MEM/4);
+	}
+
+	/* Fill contents with recognisable garbage. */
+	for (i = 0; i < USER_MEM/4; i++)
+		((char *)__user_addr_max - USER_MEM/4)[i] = i;
+
+	/* This will allocate an indirect, so force it to allocate user addr */
+	__kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
+	err = virtqueue_add_outbuf(vq, guest_sg, RINGSIZE, &err, GFP_KERNEL);
+	if (err)
+		errx(1, "virtqueue_add_outbuf (large): %i", err);
+	__kmalloc_fake = NULL;
+
+	/* Host picks it up (allocates new iov). */
+	vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
+	vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
+
+	err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+	if (err != 1)
+		errx(1, "vringh_getdesc_user: %i", err);
+
+	assert(riov.max_num & VRINGH_IOV_ALLOCATED);
+	assert(riov.iov != host_riov);
+	if (getrange != getrange_slow)
+		assert(riov.used == RINGSIZE);
+	else
+		assert(riov.used == RINGSIZE * USER_MEM/4);
+
+	assert(!(wiov.max_num & VRINGH_IOV_ALLOCATED));
+	assert(wiov.used == 0);
+
+	/* Pull data back out (in odd chunks), should be as expected. */
+	for (i = 0; i < RINGSIZE * USER_MEM/4; i += 3) {
+		err = vringh_iov_pull_user(&riov, buf, 3);
+		if (err != 3 && i + err != RINGSIZE * USER_MEM/4)
+			errx(1, "vringh_iov_pull_user large: %i", err);
+		assert(buf[0] == (char)i);
+		assert(err < 2 || buf[1] == (char)(i + 1));
+		assert(err < 3 || buf[2] == (char)(i + 2));
+	}
+	assert(riov.i == riov.used);
+	vringh_iov_cleanup(&riov);
+	vringh_iov_cleanup(&wiov);
+
+	/* Complete using multi interface, just because we can. */
+	used[0].id = head;
+	used[0].len = 0;
+	err = vringh_complete_multi_user(&vrh, used, 1);
+	if (err)
+		errx(1, "vringh_complete_multi_user(1): %i", err);
+
+	/* Free up those descriptors. */
+	ret = virtqueue_get_buf(vq, &i);
+	if (ret != &err)
+		errx(1, "virtqueue_get_buf: %p", ret);
+
+	/* Add lots of descriptors. */
+	sg_init_table(guest_sg, 1);
+	sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
+	for (i = 0; i < RINGSIZE; i++) {
+		err = virtqueue_add_outbuf(vq, guest_sg, 1, &err, GFP_KERNEL);
+		if (err)
+			errx(1, "virtqueue_add_outbuf (multiple): %i", err);
+	}
+
+	/* Now get many, and consume them all at once. */
+	vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
+	vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
+
+	for (i = 0; i < RINGSIZE; i++) {
+		err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+		if (err != 1)
+			errx(1, "vringh_getdesc_user: %i", err);
+		used[i].id = head;
+		used[i].len = 0;
+	}
+	/* Make sure it wraps around ring, to test! */
+	assert(vrh.vring.used->idx % RINGSIZE != 0);
+	err = vringh_complete_multi_user(&vrh, used, RINGSIZE);
+	if (err)
+		errx(1, "vringh_complete_multi_user: %i", err);
+
+	/* Free those buffers. */
+	for (i = 0; i < RINGSIZE; i++) {
+		unsigned len;
+		assert(virtqueue_get_buf(vq, &len) != NULL);
+	}
+
+	/* Test weird (but legal!) indirect. */
+	if (__virtio_test_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
+		char *data = __user_addr_max - USER_MEM/4;
+		struct vring_desc *d = __user_addr_max - USER_MEM/2;
+		struct vring vring;
+
+		/* Force creation of direct, which we modify. */
+		__virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
+		vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
+					 false, __user_addr_min,
+					 never_notify_host,
+					 never_callback_guest,
+					 "guest vq");
+
+		sg_init_table(guest_sg, 4);
+		sg_set_buf(&guest_sg[0], d, sizeof(*d)*2);
+		sg_set_buf(&guest_sg[1], d + 2, sizeof(*d)*1);
+		sg_set_buf(&guest_sg[2], data + 6, 4);
+		sg_set_buf(&guest_sg[3], d + 3, sizeof(*d)*3);
+
+		err = virtqueue_add_outbuf(vq, guest_sg, 4, &err, GFP_KERNEL);
+		if (err)
+			errx(1, "virtqueue_add_outbuf (indirect): %i", err);
+
+		vring_init(&vring, RINGSIZE, __user_addr_min, ALIGN);
+
+		/* They're used in order, but double-check... */
+		assert(vring.desc[0].addr == (unsigned long)d);
+		assert(vring.desc[1].addr == (unsigned long)(d+2));
+		assert(vring.desc[2].addr == (unsigned long)data + 6);
+		assert(vring.desc[3].addr == (unsigned long)(d+3));
+		vring.desc[0].flags |= VRING_DESC_F_INDIRECT;
+		vring.desc[1].flags |= VRING_DESC_F_INDIRECT;
+		vring.desc[3].flags |= VRING_DESC_F_INDIRECT;
+
+		/* First indirect */
+		d[0].addr = (unsigned long)data;
+		d[0].len = 1;
+		d[0].flags = VRING_DESC_F_NEXT;
+		d[0].next = 1;
+		d[1].addr = (unsigned long)data + 1;
+		d[1].len = 2;
+		d[1].flags = 0;
+
+		/* Second indirect */
+		d[2].addr = (unsigned long)data + 3;
+		d[2].len = 3;
+		d[2].flags = 0;
+
+		/* Third indirect */
+		d[3].addr = (unsigned long)data + 10;
+		d[3].len = 5;
+		d[3].flags = VRING_DESC_F_NEXT;
+		d[3].next = 1;
+		d[4].addr = (unsigned long)data + 15;
+		d[4].len = 6;
+		d[4].flags = VRING_DESC_F_NEXT;
+		d[4].next = 2;
+		d[5].addr = (unsigned long)data + 21;
+		d[5].len = 7;
+		d[5].flags = 0;
+
+		/* Host picks it up (allocates new iov). */
+		vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
+		vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
+
+		err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+		if (err != 1)
+			errx(1, "vringh_getdesc_user: %i", err);
+
+		if (head != 0)
+			errx(1, "vringh_getdesc_user: head %i not 0", head);
+
+		assert(riov.max_num & VRINGH_IOV_ALLOCATED);
+		if (getrange != getrange_slow)
+			assert(riov.used == 7);
+		else
+			assert(riov.used == 28);
+		err = vringh_iov_pull_user(&riov, buf, 29);
+		assert(err == 28);
+
+		/* Data should be linear. */
+		for (i = 0; i < err; i++)
+			assert(buf[i] == i);
+		vringh_iov_cleanup(&riov);
+	}
+
+	/* Don't leak memory... */
+	vring_del_virtqueue(vq);
+	free(__user_addr_min);
+
+	return 0;
+}