diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
commit | 76cb841cb886eef6b3bee341a2266c76578724ad (patch) | |
tree | f5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /tools/virtio | |
parent | Initial commit. (diff) | |
download | linux-76cb841cb886eef6b3bee341a2266c76578724ad.tar.xz linux-76cb841cb886eef6b3bee341a2266c76578724ad.zip |
Adding upstream version 4.19.249.upstream/4.19.249
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
51 files changed, 4102 insertions, 0 deletions
diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore new file mode 100644 index 000000000..1cfbb0157 --- /dev/null +++ b/tools/virtio/.gitignore @@ -0,0 +1,3 @@ +*.d +virtio_test +vringh_test diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile new file mode 100644 index 000000000..8e2a90811 --- /dev/null +++ b/tools/virtio/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +all: test mod +test: virtio_test vringh_test +virtio_test: virtio_ring.o virtio_test.o +vringh_test: vringh_test.o vringh.o virtio_ring.o + +CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE +vpath %.c ../../drivers/virtio ../../drivers/vhost +mod: + ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} +.PHONY: all test mod clean +clean: + ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \ + vhost_test/Module.symvers vhost_test/modules.order *.d +-include *.d diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h new file mode 100644 index 000000000..d0351f83a --- /dev/null +++ b/tools/virtio/asm/barrier.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if defined(__i386__) || defined(__x86_64__) +#define barrier() asm volatile("" ::: "memory") +#define virt_mb() __sync_synchronize() +#define virt_rmb() barrier() +#define virt_wmb() barrier() +/* Atomic store should be enough, but gcc generates worse code in that case. */ +#define virt_store_mb(var, value) do { \ + typeof(var) virt_store_mb_value = (value); \ + __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \ + __ATOMIC_SEQ_CST); \ + barrier(); \ +} while (0); +/* Weak barriers should be used. If not - it's a bug */ +# define mb() abort() +# define dma_rmb() abort() +# define dma_wmb() abort() +#else +#error Please fill in barrier macros +#endif + diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h new file mode 100644 index 000000000..b14c2c3b6 --- /dev/null +++ b/tools/virtio/linux/bug.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BUG_H +#define BUG_H + +#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) + +#define BUILD_BUG_ON(x) + +#define BUG() abort() + +#endif /* BUG_H */ diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h new file mode 100644 index 000000000..903dc9c4b --- /dev/null +++ b/tools/virtio/linux/compiler.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_COMPILER_H +#define LINUX_COMPILER_H + +#define WRITE_ONCE(var, val) \ + (*((volatile typeof(val) *)(&(var))) = (val)) + +#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var)))) + +#endif diff --git a/tools/virtio/linux/device.h b/tools/virtio/linux/device.h new file mode 100644 index 000000000..4ad7e1df0 --- /dev/null +++ b/tools/virtio/linux/device.h @@ -0,0 +1,2 @@ +#ifndef LINUX_DEVICE_H +#endif diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h new file mode 100644 index 000000000..f91aeb5fe --- /dev/null +++ b/tools/virtio/linux/dma-mapping.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_DMA_MAPPING_H +#define _LINUX_DMA_MAPPING_H + +#ifdef CONFIG_HAS_DMA +# error Virtio userspace code does not support CONFIG_HAS_DMA +#endif + +enum dma_data_direction { + DMA_BIDIRECTIONAL = 0, + DMA_TO_DEVICE = 1, + DMA_FROM_DEVICE = 2, + DMA_NONE = 3, +}; + +#define dma_alloc_coherent(d, s, hp, f) ({ \ + void *__dma_alloc_coherent_p = kmalloc((s), (f)); \ + *(hp) = (unsigned long)__dma_alloc_coherent_p; \ + __dma_alloc_coherent_p; \ +}) + +#define dma_free_coherent(d, s, p, h) kfree(p) + +#define dma_map_page(d, p, o, s, dir) (page_to_phys(p) + (o)) + +#define dma_map_single(d, p, s, dir) (virt_to_phys(p)) +#define dma_mapping_error(...) (0) + +#define dma_unmap_single(...) do { } while (0) +#define dma_unmap_page(...) do { } while (0) + +#endif diff --git a/tools/virtio/linux/err.h b/tools/virtio/linux/err.h new file mode 100644 index 000000000..0943c644a --- /dev/null +++ b/tools/virtio/linux/err.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ERR_H +#define ERR_H +#define MAX_ERRNO 4095 + +#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO) + +static inline void * __must_check ERR_PTR(long error) +{ + return (void *) error; +} + +static inline long __must_check PTR_ERR(const void *ptr) +{ + return (long) ptr; +} + +static inline long __must_check IS_ERR(const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +static inline long __must_check IS_ERR_OR_NULL(const void *ptr) +{ + return !ptr || IS_ERR_VALUE((unsigned long)ptr); +} +#endif /* ERR_H */ diff --git a/tools/virtio/linux/export.h b/tools/virtio/linux/export.h new file mode 100644 index 000000000..416875e29 --- /dev/null +++ b/tools/virtio/linux/export.h @@ -0,0 +1,3 @@ +#define EXPORT_SYMBOL_GPL(sym) extern typeof(sym) sym +#define EXPORT_SYMBOL(sym) extern typeof(sym) sym + diff --git a/tools/virtio/linux/hrtimer.h b/tools/virtio/linux/hrtimer.h new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tools/virtio/linux/hrtimer.h diff --git a/tools/virtio/linux/irqreturn.h b/tools/virtio/linux/irqreturn.h new file mode 100644 index 000000000..a3c4e7be7 --- /dev/null +++ b/tools/virtio/linux/irqreturn.h @@ -0,0 +1 @@ +#include "../../../include/linux/irqreturn.h" diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h new file mode 100644 index 000000000..7ef45a4a3 --- /dev/null +++ b/tools/virtio/linux/kernel.h @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef KERNEL_H +#define KERNEL_H +#include <stdbool.h> +#include <stdlib.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdarg.h> + +#include <linux/compiler.h> +#include <linux/types.h> +#include <linux/printk.h> +#include <linux/bug.h> +#include <errno.h> +#include <unistd.h> +#include <asm/barrier.h> + +#define CONFIG_SMP + +#define PAGE_SIZE getpagesize() +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK) + +/* generic data direction definitions */ +#define READ 0 +#define WRITE 1 + +typedef unsigned long long phys_addr_t; +typedef unsigned long long dma_addr_t; +typedef size_t __kernel_size_t; +typedef unsigned int __wsum; + +struct page { + unsigned long long dummy; +}; + +/* Physical == Virtual */ +#define virt_to_phys(p) ((unsigned long)p) +#define phys_to_virt(a) ((void *)(unsigned long)(a)) +/* Page address: Virtual / 4K */ +#define page_to_phys(p) ((dma_addr_t)(unsigned long)(p)) +#define virt_to_page(p) ((struct page *)((unsigned long)p & PAGE_MASK)) + +#define offset_in_page(p) (((unsigned long)p) % PAGE_SIZE) + +#define __printf(a,b) __attribute__((format(printf,a,b))) + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) + +extern void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; +static inline void *kmalloc(size_t s, gfp_t gfp) +{ + if (__kmalloc_fake) + return __kmalloc_fake; + return malloc(s); +} +static inline void *kmalloc_array(unsigned n, size_t s, gfp_t gfp) +{ + return kmalloc(n * s, gfp); +} + +static inline void *kzalloc(size_t s, gfp_t gfp) +{ + void *p = kmalloc(s, gfp); + + memset(p, 0, s); + return p; +} + +static inline void *alloc_pages_exact(size_t s, gfp_t gfp) +{ + return kmalloc(s, gfp); +} + +static inline void kfree(void *p) +{ + if (p >= __kfree_ignore_start && p < __kfree_ignore_end) + return; + free(p); +} + +static inline void free_pages_exact(void *p, size_t s) +{ + kfree(p); +} + +static inline void *krealloc(void *p, size_t s, gfp_t gfp) +{ + return realloc(p, s); +} + + +static inline unsigned long __get_free_page(gfp_t gfp) +{ + void *p; + + posix_memalign(&p, PAGE_SIZE, PAGE_SIZE); + return (unsigned long)p; +} + +static inline void free_page(unsigned long addr) +{ + free((void *)addr); +} + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +#define uninitialized_var(x) x = x + +# ifndef likely +# define likely(x) (__builtin_expect(!!(x), 1)) +# endif +# ifndef unlikely +# define unlikely(x) (__builtin_expect(!!(x), 0)) +# endif + +#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#ifdef DEBUG +#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#else +#define pr_debug(format, ...) do {} while (0) +#endif +#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) + +#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0) + +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */ +#define list_add_tail(a, b) do {} while (0) +#define list_del(a) do {} while (0) +#define list_for_each_entry(a, b, c) while (0) +/* end of stubs */ + +#endif /* KERNEL_H */ diff --git a/tools/virtio/linux/kmemleak.h b/tools/virtio/linux/kmemleak.h new file mode 100644 index 000000000..c07072270 --- /dev/null +++ b/tools/virtio/linux/kmemleak.h @@ -0,0 +1,3 @@ +static inline void kmemleak_ignore(const void *ptr) +{ +} diff --git a/tools/virtio/linux/module.h b/tools/virtio/linux/module.h new file mode 100644 index 000000000..9dfa96fea --- /dev/null +++ b/tools/virtio/linux/module.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> + +#define MODULE_LICENSE(__MODULE_LICENSE_value) \ + static __attribute__((unused)) const char *__MODULE_LICENSE_name = \ + __MODULE_LICENSE_value + diff --git a/tools/virtio/linux/printk.h b/tools/virtio/linux/printk.h new file mode 100644 index 000000000..9f2423bd8 --- /dev/null +++ b/tools/virtio/linux/printk.h @@ -0,0 +1,4 @@ +#include "../../../include/linux/kern_levels.h" + +#define printk printf +#define vprintk vprintf diff --git a/tools/virtio/linux/ratelimit.h b/tools/virtio/linux/ratelimit.h new file mode 100644 index 000000000..dcce1725f --- /dev/null +++ b/tools/virtio/linux/ratelimit.h @@ -0,0 +1,4 @@ +#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) int name = 0 + +#define __ratelimit(x) (*(x)) + diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h new file mode 100644 index 000000000..369ee308b --- /dev/null +++ b/tools/virtio/linux/scatterlist.h @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef SCATTERLIST_H +#define SCATTERLIST_H +#include <linux/kernel.h> + +struct scatterlist { + unsigned long page_link; + unsigned int offset; + unsigned int length; + dma_addr_t dma_address; +}; + +/* Scatterlist helpers, stolen from linux/scatterlist.h */ +#define sg_is_chain(sg) ((sg)->page_link & 0x01) +#define sg_is_last(sg) ((sg)->page_link & 0x02) +#define sg_chain_ptr(sg) \ + ((struct scatterlist *) ((sg)->page_link & ~0x03)) + +/** + * sg_assign_page - Assign a given page to an SG entry + * @sg: SG entry + * @page: The page + * + * Description: + * Assign page to sg entry. Also see sg_set_page(), the most commonly used + * variant. + * + **/ +static inline void sg_assign_page(struct scatterlist *sg, struct page *page) +{ + unsigned long page_link = sg->page_link & 0x3; + + /* + * In order for the low bit stealing approach to work, pages + * must be aligned at a 32-bit boundary as a minimum. + */ + BUG_ON((unsigned long) page & 0x03); +#ifdef CONFIG_DEBUG_SG + BUG_ON(sg_is_chain(sg)); +#endif + sg->page_link = page_link | (unsigned long) page; +} + +/** + * sg_set_page - Set sg entry to point at given page + * @sg: SG entry + * @page: The page + * @len: Length of data + * @offset: Offset into page + * + * Description: + * Use this function to set an sg entry pointing at a page, never assign + * the page directly. We encode sg table information in the lower bits + * of the page pointer. See sg_page() for looking up the page belonging + * to an sg entry. + * + **/ +static inline void sg_set_page(struct scatterlist *sg, struct page *page, + unsigned int len, unsigned int offset) +{ + sg_assign_page(sg, page); + sg->offset = offset; + sg->length = len; +} + +static inline struct page *sg_page(struct scatterlist *sg) +{ +#ifdef CONFIG_DEBUG_SG + BUG_ON(sg_is_chain(sg)); +#endif + return (struct page *)((sg)->page_link & ~0x3); +} + +/* + * Loop over each sg element, following the pointer to a new list if necessary + */ +#define for_each_sg(sglist, sg, nr, __i) \ + for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg)) + +/** + * sg_chain - Chain two sglists together + * @prv: First scatterlist + * @prv_nents: Number of entries in prv + * @sgl: Second scatterlist + * + * Description: + * Links @prv@ and @sgl@ together, to form a longer scatterlist. + * + **/ +static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, + struct scatterlist *sgl) +{ + /* + * offset and length are unused for chain entry. Clear them. + */ + prv[prv_nents - 1].offset = 0; + prv[prv_nents - 1].length = 0; + + /* + * Set lowest bit to indicate a link pointer, and make sure to clear + * the termination bit if it happens to be set. + */ + prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02; +} + +/** + * sg_mark_end - Mark the end of the scatterlist + * @sg: SG entryScatterlist + * + * Description: + * Marks the passed in sg entry as the termination point for the sg + * table. A call to sg_next() on this entry will return NULL. + * + **/ +static inline void sg_mark_end(struct scatterlist *sg) +{ + /* + * Set termination bit, clear potential chain bit + */ + sg->page_link |= 0x02; + sg->page_link &= ~0x01; +} + +/** + * sg_unmark_end - Undo setting the end of the scatterlist + * @sg: SG entryScatterlist + * + * Description: + * Removes the termination marker from the given entry of the scatterlist. + * + **/ +static inline void sg_unmark_end(struct scatterlist *sg) +{ + sg->page_link &= ~0x02; +} + +static inline struct scatterlist *sg_next(struct scatterlist *sg) +{ + if (sg_is_last(sg)) + return NULL; + + sg++; + if (unlikely(sg_is_chain(sg))) + sg = sg_chain_ptr(sg); + + return sg; +} + +static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) +{ + memset(sgl, 0, sizeof(*sgl) * nents); + sg_mark_end(&sgl[nents - 1]); +} + +static inline dma_addr_t sg_phys(struct scatterlist *sg) +{ + return page_to_phys(sg_page(sg)) + sg->offset; +} + +static inline void sg_set_buf(struct scatterlist *sg, const void *buf, + unsigned int buflen) +{ + sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); +} + +static inline void sg_init_one(struct scatterlist *sg, + const void *buf, unsigned int buflen) +{ + sg_init_table(sg, 1); + sg_set_buf(sg, buf, buflen); +} +#endif /* SCATTERLIST_H */ diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h new file mode 100644 index 000000000..319dcaa07 --- /dev/null +++ b/tools/virtio/linux/slab.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_SLAB_H +#define GFP_KERNEL 0 +#define GFP_ATOMIC 0 +#define __GFP_NOWARN 0 +#define __GFP_ZERO 0 +#endif diff --git a/tools/virtio/linux/thread_info.h b/tools/virtio/linux/thread_info.h new file mode 100644 index 000000000..e0f610d08 --- /dev/null +++ b/tools/virtio/linux/thread_info.h @@ -0,0 +1 @@ +#define check_copy_size(A, B, C) (1) diff --git a/tools/virtio/linux/uaccess.h b/tools/virtio/linux/uaccess.h new file mode 100644 index 000000000..991dfb263 --- /dev/null +++ b/tools/virtio/linux/uaccess.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef UACCESS_H +#define UACCESS_H + +#include <linux/compiler.h> + +extern void *__user_addr_min, *__user_addr_max; + +static inline void __chk_user_ptr(const volatile void *p, size_t size) +{ + assert(p >= __user_addr_min && p + size <= __user_addr_max); +} + +#define put_user(x, ptr) \ +({ \ + typeof(ptr) __pu_ptr = (ptr); \ + __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \ + WRITE_ONCE(*(__pu_ptr), x); \ + 0; \ +}) + +#define get_user(x, ptr) \ +({ \ + typeof(ptr) __pu_ptr = (ptr); \ + __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \ + x = READ_ONCE(*(__pu_ptr)); \ + 0; \ +}) + +static void volatile_memcpy(volatile char *to, const volatile char *from, + unsigned long n) +{ + while (n--) + *(to++) = *(from++); +} + +static inline int copy_from_user(void *to, const void __user volatile *from, + unsigned long n) +{ + __chk_user_ptr(from, n); + volatile_memcpy(to, from, n); + return 0; +} + +static inline int copy_to_user(void __user volatile *to, const void *from, + unsigned long n) +{ + __chk_user_ptr(to, n); + volatile_memcpy(to, from, n); + return 0; +} +#endif /* UACCESS_H */ diff --git a/tools/virtio/linux/uio.h b/tools/virtio/linux/uio.h new file mode 100644 index 000000000..cd20f0ba3 --- /dev/null +++ b/tools/virtio/linux/uio.h @@ -0,0 +1,3 @@ +#include <linux/kernel.h> + +#include "../../../include/linux/uio.h" diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h new file mode 100644 index 000000000..b751350d4 --- /dev/null +++ b/tools/virtio/linux/virtio.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_VIRTIO_H +#define LINUX_VIRTIO_H +#include <linux/scatterlist.h> +#include <linux/kernel.h> + +struct device { + void *parent; +}; + +struct virtio_device { + struct device dev; + u64 features; +}; + +struct virtqueue { + /* TODO: commented as list macros are empty stubs for now. + * Broken but enough for virtio_ring.c + * struct list_head list; */ + void (*callback)(struct virtqueue *vq); + const char *name; + struct virtio_device *vdev; + unsigned int index; + unsigned int num_free; + void *priv; +}; + +/* Interfaces exported by virtio_ring. */ +int virtqueue_add_sgs(struct virtqueue *vq, + struct scatterlist *sgs[], + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp); + +int virtqueue_add_outbuf(struct virtqueue *vq, + struct scatterlist sg[], unsigned int num, + void *data, + gfp_t gfp); + +int virtqueue_add_inbuf(struct virtqueue *vq, + struct scatterlist sg[], unsigned int num, + void *data, + gfp_t gfp); + +bool virtqueue_kick(struct virtqueue *vq); + +void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); + +void virtqueue_disable_cb(struct virtqueue *vq); + +bool virtqueue_enable_cb(struct virtqueue *vq); +bool virtqueue_enable_cb_delayed(struct virtqueue *vq); + +void *virtqueue_detach_unused_buf(struct virtqueue *vq); +struct virtqueue *vring_new_virtqueue(unsigned int index, + unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + bool weak_barriers, + bool ctx, + void *pages, + bool (*notify)(struct virtqueue *vq), + void (*callback)(struct virtqueue *vq), + const char *name); +void vring_del_virtqueue(struct virtqueue *vq); + +#endif diff --git a/tools/virtio/linux/virtio_byteorder.h b/tools/virtio/linux/virtio_byteorder.h new file mode 100644 index 000000000..5b50f7eeb --- /dev/null +++ b/tools/virtio/linux/virtio_byteorder.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_BYTEORDER_STUB_H +#define _LINUX_VIRTIO_BYTEORDER_STUB_H + +#include <asm/byteorder.h> +#include "../../include/linux/byteorder/generic.h" +#include "../../include/linux/virtio_byteorder.h" + +#endif diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h new file mode 100644 index 000000000..dbf14c1e2 --- /dev/null +++ b/tools/virtio/linux/virtio_config.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/virtio_byteorder.h> +#include <linux/virtio.h> +#include <uapi/linux/virtio_config.h> + +/* + * __virtio_test_bit - helper to test feature bits. For use by transports. + * Devices should normally use virtio_has_feature, + * which includes more checks. + * @vdev: the device + * @fbit: the feature bit + */ +static inline bool __virtio_test_bit(const struct virtio_device *vdev, + unsigned int fbit) +{ + return vdev->features & (1ULL << fbit); +} + +/** + * __virtio_set_bit - helper to set feature bits. For use by transports. + * @vdev: the device + * @fbit: the feature bit + */ +static inline void __virtio_set_bit(struct virtio_device *vdev, + unsigned int fbit) +{ + vdev->features |= (1ULL << fbit); +} + +/** + * __virtio_clear_bit - helper to clear feature bits. For use by transports. + * @vdev: the device + * @fbit: the feature bit + */ +static inline void __virtio_clear_bit(struct virtio_device *vdev, + unsigned int fbit) +{ + vdev->features &= ~(1ULL << fbit); +} + +#define virtio_has_feature(dev, feature) \ + (__virtio_test_bit((dev), feature)) + +/** + * virtio_has_iommu_quirk - determine whether this device has the iommu quirk + * @vdev: the device + */ +static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev) +{ + /* + * Note the reverse polarity of the quirk feature (compared to most + * other features), this is for compatibility with legacy systems. + */ + return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +} + +static inline bool virtio_is_little_endian(struct virtio_device *vdev) +{ + return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) || + virtio_legacy_is_little_endian(); +} + +/* Memory accessors */ +static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val) +{ + return __virtio16_to_cpu(virtio_is_little_endian(vdev), val); +} + +static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val) +{ + return __cpu_to_virtio16(virtio_is_little_endian(vdev), val); +} + +static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val) +{ + return __virtio32_to_cpu(virtio_is_little_endian(vdev), val); +} + +static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val) +{ + return __cpu_to_virtio32(virtio_is_little_endian(vdev), val); +} + +static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val) +{ + return __virtio64_to_cpu(virtio_is_little_endian(vdev), val); +} + +static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) +{ + return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); +} diff --git a/tools/virtio/linux/virtio_ring.h b/tools/virtio/linux/virtio_ring.h new file mode 100644 index 000000000..8949c4e27 --- /dev/null +++ b/tools/virtio/linux/virtio_ring.h @@ -0,0 +1 @@ +#include "../../../include/linux/virtio_ring.h" diff --git a/tools/virtio/linux/vringh.h b/tools/virtio/linux/vringh.h new file mode 100644 index 000000000..9348957be --- /dev/null +++ b/tools/virtio/linux/vringh.h @@ -0,0 +1 @@ +#include "../../../include/linux/vringh.h" diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile new file mode 100644 index 000000000..85c98c281 --- /dev/null +++ b/tools/virtio/ringtest/Makefile @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 +all: + +all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring noring + +CFLAGS += -Wall +CFLAGS += -pthread -O2 -ggdb -flto -fwhole-program +LDFLAGS += -pthread -O2 -ggdb -flto -fwhole-program + +main.o: main.c main.h +ring.o: ring.c main.h +ptr_ring.o: ptr_ring.c main.h ../../../include/linux/ptr_ring.h +virtio_ring_0_9.o: virtio_ring_0_9.c main.h +virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h +virtio_ring_inorder.o: virtio_ring_inorder.c virtio_ring_0_9.c main.h +ring: ring.o main.o +virtio_ring_0_9: virtio_ring_0_9.o main.o +virtio_ring_poll: virtio_ring_poll.o main.o +virtio_ring_inorder: virtio_ring_inorder.o main.o +ptr_ring: ptr_ring.o main.o +noring: noring.o main.o +clean: + -rm main.o + -rm ring.o ring + -rm virtio_ring_0_9.o virtio_ring_0_9 + -rm virtio_ring_poll.o virtio_ring_poll + -rm virtio_ring_inorder.o virtio_ring_inorder + -rm ptr_ring.o ptr_ring + -rm noring.o noring + +.PHONY: all clean diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README new file mode 100644 index 000000000..d83707a33 --- /dev/null +++ b/tools/virtio/ringtest/README @@ -0,0 +1,6 @@ +Partial implementation of various ring layouts, useful to tune virtio design. +Uses shared memory heavily. + +Typical use: + +# sh run-on-all.sh perf stat -r 10 --log-fd 1 -- ./ring diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c new file mode 100644 index 000000000..453ca3c21 --- /dev/null +++ b/tools/virtio/ringtest/main.c @@ -0,0 +1,391 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Command line processing and common functions for ring benchmarking. + */ +#define _GNU_SOURCE +#include <getopt.h> +#include <pthread.h> +#include <assert.h> +#include <sched.h> +#include "main.h" +#include <sys/eventfd.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <limits.h> + +int runcycles = 10000000; +int max_outstanding = INT_MAX; +int batch = 1; +int param = 0; + +bool do_sleep = false; +bool do_relax = false; +bool do_exit = true; + +unsigned ring_size = 256; + +static int kickfd = -1; +static int callfd = -1; + +void notify(int fd) +{ + unsigned long long v = 1; + int r; + + vmexit(); + r = write(fd, &v, sizeof v); + assert(r == sizeof v); + vmentry(); +} + +void wait_for_notify(int fd) +{ + unsigned long long v = 1; + int r; + + vmexit(); + r = read(fd, &v, sizeof v); + assert(r == sizeof v); + vmentry(); +} + +void kick(void) +{ + notify(kickfd); +} + +void wait_for_kick(void) +{ + wait_for_notify(kickfd); +} + +void call(void) +{ + notify(callfd); +} + +void wait_for_call(void) +{ + wait_for_notify(callfd); +} + +void set_affinity(const char *arg) +{ + cpu_set_t cpuset; + int ret; + pthread_t self; + long int cpu; + char *endptr; + + if (!arg) + return; + + cpu = strtol(arg, &endptr, 0); + assert(!*endptr); + + assert(cpu >= 0 && cpu < CPU_SETSIZE); + + self = pthread_self(); + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset); + assert(!ret); +} + +void poll_used(void) +{ + while (used_empty()) + busy_wait(); +} + +static void __attribute__((__flatten__)) run_guest(void) +{ + int completed_before; + int completed = 0; + int started = 0; + int bufs = runcycles; + int spurious = 0; + int r; + unsigned len; + void *buf; + int tokick = batch; + + for (;;) { + if (do_sleep) + disable_call(); + completed_before = completed; + do { + if (started < bufs && + started - completed < max_outstanding) { + r = add_inbuf(0, "Buffer\n", "Hello, world!"); + if (__builtin_expect(r == 0, true)) { + ++started; + if (!--tokick) { + tokick = batch; + if (do_sleep) + kick_available(); + } + + } + } else + r = -1; + + /* Flush out completed bufs if any */ + if (get_buf(&len, &buf)) { + ++completed; + if (__builtin_expect(completed == bufs, false)) + return; + r = 0; + } + } while (r == 0); + if (completed == completed_before) + ++spurious; + assert(completed <= bufs); + assert(started <= bufs); + if (do_sleep) { + if (used_empty() && enable_call()) + wait_for_call(); + } else { + poll_used(); + } + } +} + +void poll_avail(void) +{ + while (avail_empty()) + busy_wait(); +} + +static void __attribute__((__flatten__)) run_host(void) +{ + int completed_before; + int completed = 0; + int spurious = 0; + int bufs = runcycles; + unsigned len; + void *buf; + + for (;;) { + if (do_sleep) { + if (avail_empty() && enable_kick()) + wait_for_kick(); + } else { + poll_avail(); + } + if (do_sleep) + disable_kick(); + completed_before = completed; + while (__builtin_expect(use_buf(&len, &buf), true)) { + if (do_sleep) + call_used(); + ++completed; + if (__builtin_expect(completed == bufs, false)) + return; + } + if (completed == completed_before) + ++spurious; + assert(completed <= bufs); + if (completed == bufs) + break; + } +} + +void *start_guest(void *arg) +{ + set_affinity(arg); + run_guest(); + pthread_exit(NULL); +} + +void *start_host(void *arg) +{ + set_affinity(arg); + run_host(); + pthread_exit(NULL); +} + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "help", + .has_arg = no_argument, + .val = 'h', + }, + { + .name = "host-affinity", + .has_arg = required_argument, + .val = 'H', + }, + { + .name = "guest-affinity", + .has_arg = required_argument, + .val = 'G', + }, + { + .name = "ring-size", + .has_arg = required_argument, + .val = 'R', + }, + { + .name = "run-cycles", + .has_arg = required_argument, + .val = 'C', + }, + { + .name = "outstanding", + .has_arg = required_argument, + .val = 'o', + }, + { + .name = "batch", + .has_arg = required_argument, + .val = 'b', + }, + { + .name = "param", + .has_arg = required_argument, + .val = 'p', + }, + { + .name = "sleep", + .has_arg = no_argument, + .val = 's', + }, + { + .name = "relax", + .has_arg = no_argument, + .val = 'x', + }, + { + .name = "exit", + .has_arg = no_argument, + .val = 'e', + }, + { + } +}; + +static void help(void) +{ + fprintf(stderr, "Usage: <test> [--help]" + " [--host-affinity H]" + " [--guest-affinity G]" + " [--ring-size R (default: %d)]" + " [--run-cycles C (default: %d)]" + " [--batch b]" + " [--outstanding o]" + " [--param p]" + " [--sleep]" + " [--relax]" + " [--exit]" + "\n", + ring_size, + runcycles); +} + +int main(int argc, char **argv) +{ + int ret; + pthread_t host, guest; + void *tret; + char *host_arg = NULL; + char *guest_arg = NULL; + char *endptr; + long int c; + + kickfd = eventfd(0, 0); + assert(kickfd >= 0); + callfd = eventfd(0, 0); + assert(callfd >= 0); + + for (;;) { + int o = getopt_long(argc, argv, optstring, longopts, NULL); + switch (o) { + case -1: + goto done; + case '?': + help(); + exit(2); + case 'H': + host_arg = optarg; + break; + case 'G': + guest_arg = optarg; + break; + case 'R': + ring_size = strtol(optarg, &endptr, 0); + assert(ring_size && !(ring_size & (ring_size - 1))); + assert(!*endptr); + break; + case 'C': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + runcycles = c; + break; + case 'o': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + max_outstanding = c; + break; + case 'p': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + param = c; + break; + case 'b': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + batch = c; + break; + case 's': + do_sleep = true; + break; + case 'x': + do_relax = true; + break; + case 'e': + do_exit = true; + break; + default: + help(); + exit(4); + break; + } + } + + /* does nothing here, used to make sure all smp APIs compile */ + smp_acquire(); + smp_release(); + smp_mb(); +done: + + if (batch > max_outstanding) + batch = max_outstanding; + + if (optind < argc) { + help(); + exit(4); + } + alloc_ring(); + + ret = pthread_create(&host, NULL, start_host, host_arg); + assert(!ret); + ret = pthread_create(&guest, NULL, start_guest, guest_arg); + assert(!ret); + + ret = pthread_join(guest, &tret); + assert(!ret); + ret = pthread_join(host, &tret); + assert(!ret); + return 0; +} diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h new file mode 100644 index 000000000..301d59bfc --- /dev/null +++ b/tools/virtio/ringtest/main.h @@ -0,0 +1,194 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Common macros and functions for ring benchmarking. + */ +#ifndef MAIN_H +#define MAIN_H + +#include <stdbool.h> + +extern int param; + +extern bool do_exit; + +#if defined(__x86_64__) || defined(__i386__) +#include "x86intrin.h" + +static inline void wait_cycles(unsigned long long cycles) +{ + unsigned long long t; + + t = __rdtsc(); + while (__rdtsc() - t < cycles) {} +} + +#define VMEXIT_CYCLES 500 +#define VMENTRY_CYCLES 500 + +#elif defined(__s390x__) +static inline void wait_cycles(unsigned long long cycles) +{ + asm volatile("0: brctg %0,0b" : : "d" (cycles)); +} + +/* tweak me */ +#define VMEXIT_CYCLES 200 +#define VMENTRY_CYCLES 200 + +#else +static inline void wait_cycles(unsigned long long cycles) +{ + _Exit(5); +} +#define VMEXIT_CYCLES 0 +#define VMENTRY_CYCLES 0 +#endif + +static inline void vmexit(void) +{ + if (!do_exit) + return; + + wait_cycles(VMEXIT_CYCLES); +} +static inline void vmentry(void) +{ + if (!do_exit) + return; + + wait_cycles(VMENTRY_CYCLES); +} + +/* implemented by ring */ +void alloc_ring(void); +/* guest side */ +int add_inbuf(unsigned, void *, void *); +void *get_buf(unsigned *, void **); +void disable_call(); +bool used_empty(); +bool enable_call(); +void kick_available(); +/* host side */ +void disable_kick(); +bool avail_empty(); +bool enable_kick(); +bool use_buf(unsigned *, void **); +void call_used(); + +/* implemented by main */ +extern bool do_sleep; +void kick(void); +void wait_for_kick(void); +void call(void); +void wait_for_call(void); + +extern unsigned ring_size; + +/* Compiler barrier - similar to what Linux uses */ +#define barrier() asm volatile("" ::: "memory") + +/* Is there a portable way to do this? */ +#if defined(__x86_64__) || defined(__i386__) +#define cpu_relax() asm ("rep; nop" ::: "memory") +#elif defined(__s390x__) +#define cpu_relax() barrier() +#else +#define cpu_relax() assert(0) +#endif + +extern bool do_relax; + +static inline void busy_wait(void) +{ + if (do_relax) + cpu_relax(); + else + /* prevent compiler from removing busy loops */ + barrier(); +} + +#if defined(__x86_64__) || defined(__i386__) +#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") +#else +/* + * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized + * with other __ATOMIC_SEQ_CST calls. + */ +#define smp_mb() __sync_synchronize() +#endif + +/* + * This abuses the atomic builtins for thread fences, and + * adds a compiler barrier. + */ +#define smp_release() do { \ + barrier(); \ + __atomic_thread_fence(__ATOMIC_RELEASE); \ +} while (0) + +#define smp_acquire() do { \ + __atomic_thread_fence(__ATOMIC_ACQUIRE); \ + barrier(); \ +} while (0) + +#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) +#define smp_wmb() barrier() +#else +#define smp_wmb() smp_release() +#endif + +#ifdef __alpha__ +#define smp_read_barrier_depends() smp_acquire() +#else +#define smp_read_barrier_depends() do {} while(0) +#endif + +static __always_inline +void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { \ + case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \ + case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \ + case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \ + case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \ + default: \ + barrier(); \ + __builtin_memcpy((void *)res, (const void *)p, size); \ + barrier(); \ + } \ +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break; + case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break; + case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break; + case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ + __u.__val; \ +}) + +#define WRITE_ONCE(x, val) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__val = (typeof(x)) (val) }; \ + __write_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) + +#endif diff --git a/tools/virtio/ringtest/noring.c b/tools/virtio/ringtest/noring.c new file mode 100644 index 000000000..ce2440d5c --- /dev/null +++ b/tools/virtio/ringtest/noring.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include "main.h" +#include <assert.h> + +/* stub implementation: useful for measuring overhead */ +void alloc_ring(void) +{ +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + return 0; +} + +/* + * skb_array API provides no way for producer to find out whether a given + * buffer was consumed. Our tests merely require that a successful get_buf + * implies that add_inbuf succeed in the past, and that add_inbuf will succeed, + * fake it accordingly. + */ +void *get_buf(unsigned *lenp, void **bufp) +{ + return "Buffer"; +} + +bool used_empty() +{ + return false; +} + +void disable_call() +{ + assert(0); +} + +bool enable_call() +{ + assert(0); +} + +void kick_available(void) +{ + assert(0); +} + +/* host side */ +void disable_kick() +{ + assert(0); +} + +bool enable_kick() +{ + assert(0); +} + +bool avail_empty() +{ + return false; +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + return true; +} + +void call_used(void) +{ + assert(0); +} diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c new file mode 100644 index 000000000..2d566fbd2 --- /dev/null +++ b/tools/virtio/ringtest/ptr_ring.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include "main.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <malloc.h> +#include <assert.h> +#include <errno.h> +#include <limits.h> + +#define SMP_CACHE_BYTES 64 +#define cache_line_size() SMP_CACHE_BYTES +#define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES))) +#define unlikely(x) (__builtin_expect(!!(x), 0)) +#define likely(x) (__builtin_expect(!!(x), 1)) +#define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a)) +#define SIZE_MAX (~(size_t)0) +#define KMALLOC_MAX_SIZE SIZE_MAX +#define BUG_ON(x) assert(x) + +typedef pthread_spinlock_t spinlock_t; + +typedef int gfp_t; +#define __GFP_ZERO 0x1 + +static void *kmalloc(unsigned size, gfp_t gfp) +{ + void *p = memalign(64, size); + if (!p) + return p; + + if (gfp & __GFP_ZERO) + memset(p, 0, size); + return p; +} + +static inline void *kzalloc(unsigned size, gfp_t flags) +{ + return kmalloc(size, flags | __GFP_ZERO); +} + +static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) +{ + if (size != 0 && n > SIZE_MAX / size) + return NULL; + return kmalloc(n * size, flags); +} + +static inline void *kcalloc(size_t n, size_t size, gfp_t flags) +{ + return kmalloc_array(n, size, flags | __GFP_ZERO); +} + +static void kfree(void *p) +{ + if (p) + free(p); +} + +#define kvmalloc_array kmalloc_array +#define kvfree kfree + +static void spin_lock_init(spinlock_t *lock) +{ + int r = pthread_spin_init(lock, 0); + assert(!r); +} + +static void spin_lock(spinlock_t *lock) +{ + int ret = pthread_spin_lock(lock); + assert(!ret); +} + +static void spin_unlock(spinlock_t *lock) +{ + int ret = pthread_spin_unlock(lock); + assert(!ret); +} + +static void spin_lock_bh(spinlock_t *lock) +{ + spin_lock(lock); +} + +static void spin_unlock_bh(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static void spin_lock_irq(spinlock_t *lock) +{ + spin_lock(lock); +} + +static void spin_unlock_irq(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static void spin_lock_irqsave(spinlock_t *lock, unsigned long f) +{ + spin_lock(lock); +} + +static void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f) +{ + spin_unlock(lock); +} + +#include "../../../include/linux/ptr_ring.h" + +static unsigned long long headcnt, tailcnt; +static struct ptr_ring array ____cacheline_aligned_in_smp; + +/* implemented by ring */ +void alloc_ring(void) +{ + int ret = ptr_ring_init(&array, ring_size, 0); + assert(!ret); + /* Hacky way to poke at ring internals. Useful for testing though. */ + if (param) + array.batch = param; +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + int ret; + + ret = __ptr_ring_produce(&array, buf); + if (ret >= 0) { + ret = 0; + headcnt++; + } + + return ret; +} + +/* + * ptr_ring API provides no way for producer to find out whether a given + * buffer was consumed. Our tests merely require that a successful get_buf + * implies that add_inbuf succeed in the past, and that add_inbuf will succeed, + * fake it accordingly. + */ +void *get_buf(unsigned *lenp, void **bufp) +{ + void *datap; + + if (tailcnt == headcnt || __ptr_ring_full(&array)) + datap = NULL; + else { + datap = "Buffer\n"; + ++tailcnt; + } + + return datap; +} + +bool used_empty() +{ + return (tailcnt == headcnt || __ptr_ring_full(&array)); +} + +void disable_call() +{ + assert(0); +} + +bool enable_call() +{ + assert(0); +} + +void kick_available(void) +{ + assert(0); +} + +/* host side */ +void disable_kick() +{ + assert(0); +} + +bool enable_kick() +{ + assert(0); +} + +bool avail_empty() +{ + return __ptr_ring_empty(&array); +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + void *ptr; + + ptr = __ptr_ring_consume(&array); + + return ptr; +} + +void call_used(void) +{ + assert(0); +} diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c new file mode 100644 index 000000000..5a41404aa --- /dev/null +++ b/tools/virtio/ringtest/ring.c @@ -0,0 +1,270 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Simple descriptor-based ring. virtio 0.9 compatible event index is used for + * signalling, unconditionally. + */ +#define _GNU_SOURCE +#include "main.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +/* Next - Where next entry will be written. + * Prev - "Next" value when event triggered previously. + * Event - Peer requested event after writing this entry. + */ +static inline bool need_event(unsigned short event, + unsigned short next, + unsigned short prev) +{ + return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); +} + +/* Design: + * Guest adds descriptors with unique index values and DESC_HW in flags. + * Host overwrites used descriptors with correct len, index, and DESC_HW clear. + * Flags are always set last. + */ +#define DESC_HW 0x1 + +struct desc { + unsigned short flags; + unsigned short index; + unsigned len; + unsigned long long addr; +}; + +/* how much padding is needed to avoid false cache sharing */ +#define HOST_GUEST_PADDING 0x80 + +/* Mostly read */ +struct event { + unsigned short kick_index; + unsigned char reserved0[HOST_GUEST_PADDING - 2]; + unsigned short call_index; + unsigned char reserved1[HOST_GUEST_PADDING - 2]; +}; + +struct data { + void *buf; /* descriptor is writeable, we can't get buf from there */ + void *data; +} *data; + +struct desc *ring; +struct event *event; + +struct guest { + unsigned avail_idx; + unsigned last_used_idx; + unsigned num_free; + unsigned kicked_avail_idx; + unsigned char reserved[HOST_GUEST_PADDING - 12]; +} guest; + +struct host { + /* we do not need to track last avail index + * unless we have more than one in flight. + */ + unsigned used_idx; + unsigned called_used_idx; + unsigned char reserved[HOST_GUEST_PADDING - 4]; +} host; + +/* implemented by ring */ +void alloc_ring(void) +{ + int ret; + int i; + + ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); + if (ret) { + perror("Unable to allocate ring buffer.\n"); + exit(3); + } + event = calloc(1, sizeof(*event)); + if (!event) { + perror("Unable to allocate event buffer.\n"); + exit(3); + } + guest.avail_idx = 0; + guest.kicked_avail_idx = -1; + guest.last_used_idx = 0; + host.used_idx = 0; + host.called_used_idx = -1; + for (i = 0; i < ring_size; ++i) { + struct desc desc = { + .index = i, + }; + ring[i] = desc; + } + guest.num_free = ring_size; + data = calloc(ring_size, sizeof(*data)); + if (!data) { + perror("Unable to allocate data buffer.\n"); + exit(3); + } +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + unsigned head, index; + + if (!guest.num_free) + return -1; + + guest.num_free--; + head = (ring_size - 1) & (guest.avail_idx++); + + /* Start with a write. On MESI architectures this helps + * avoid a shared state with consumer that is polling this descriptor. + */ + ring[head].addr = (unsigned long)(void*)buf; + ring[head].len = len; + /* read below might bypass write above. That is OK because it's just an + * optimization. If this happens, we will get the cache line in a + * shared state which is unfortunate, but probably not worth it to + * add an explicit full barrier to avoid this. + */ + barrier(); + index = ring[head].index; + data[index].buf = buf; + data[index].data = datap; + /* Barrier A (for pairing) */ + smp_release(); + ring[head].flags = DESC_HW; + + return 0; +} + +void *get_buf(unsigned *lenp, void **bufp) +{ + unsigned head = (ring_size - 1) & guest.last_used_idx; + unsigned index; + void *datap; + + if (ring[head].flags & DESC_HW) + return NULL; + /* Barrier B (for pairing) */ + smp_acquire(); + *lenp = ring[head].len; + index = ring[head].index & (ring_size - 1); + datap = data[index].data; + *bufp = data[index].buf; + data[index].buf = NULL; + data[index].data = NULL; + guest.num_free++; + guest.last_used_idx++; + return datap; +} + +bool used_empty() +{ + unsigned head = (ring_size - 1) & guest.last_used_idx; + + return (ring[head].flags & DESC_HW); +} + +void disable_call() +{ + /* Doing nothing to disable calls might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_call() +{ + event->call_index = guest.last_used_idx; + /* Flush call index write */ + /* Barrier D (for pairing) */ + smp_mb(); + return used_empty(); +} + +void kick_available(void) +{ + bool need; + + /* Flush in previous flags write */ + /* Barrier C (for pairing) */ + smp_mb(); + need = need_event(event->kick_index, + guest.avail_idx, + guest.kicked_avail_idx); + + guest.kicked_avail_idx = guest.avail_idx; + if (need) + kick(); +} + +/* host side */ +void disable_kick() +{ + /* Doing nothing to disable kicks might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_kick() +{ + event->kick_index = host.used_idx; + /* Barrier C (for pairing) */ + smp_mb(); + return avail_empty(); +} + +bool avail_empty() +{ + unsigned head = (ring_size - 1) & host.used_idx; + + return !(ring[head].flags & DESC_HW); +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + unsigned head = (ring_size - 1) & host.used_idx; + + if (!(ring[head].flags & DESC_HW)) + return false; + + /* make sure length read below is not speculated */ + /* Barrier A (for pairing) */ + smp_acquire(); + + /* simple in-order completion: we don't need + * to touch index at all. This also means we + * can just modify the descriptor in-place. + */ + ring[head].len--; + /* Make sure len is valid before flags. + * Note: alternative is to write len and flags in one access - + * possible on 64 bit architectures but wmb is free on Intel anyway + * so I have no way to test whether it's a gain. + */ + /* Barrier B (for pairing) */ + smp_release(); + ring[head].flags = 0; + host.used_idx++; + return true; +} + +void call_used(void) +{ + bool need; + + /* Flush in previous flags write */ + /* Barrier D (for pairing) */ + smp_mb(); + + need = need_event(event->call_index, + host.used_idx, + host.called_used_idx); + + host.called_used_idx = host.used_idx; + + if (need) + call(); +} diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh new file mode 100755 index 000000000..dcc3ea758 --- /dev/null +++ b/tools/virtio/ringtest/run-on-all.sh @@ -0,0 +1,26 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +CPUS_ONLINE=$(lscpu --online -p=cpu|grep -v -e '#') +#use last CPU for host. Why not the first? +#many devices tend to use cpu0 by default so +#it tends to be busier +HOST_AFFINITY=$(echo "${CPUS_ONLINE}"|tail -n 1) + +#run command on all cpus +for cpu in $CPUS_ONLINE +do + #Don't run guest and host on same CPU + #It actually works ok if using signalling + if + (echo "$@" | grep -e "--sleep" > /dev/null) || \ + test $HOST_AFFINITY '!=' $cpu + then + echo "GUEST AFFINITY $cpu" + "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu + fi +done +echo "NO GUEST AFFINITY" +"$@" --host-affinity $HOST_AFFINITY +echo "NO AFFINITY" +"$@" diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c new file mode 100644 index 000000000..5fd3fbcb9 --- /dev/null +++ b/tools/virtio/ringtest/virtio_ring_0_9.c @@ -0,0 +1,333 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Partial implementation of virtio 0.9. event index is used for signalling, + * unconditionally. Design roughly follows linux kernel implementation in order + * to be able to judge its performance. + */ +#define _GNU_SOURCE +#include "main.h" +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <string.h> +#include <linux/virtio_ring.h> + +struct data { + void *data; +} *data; + +struct vring ring; + +/* enabling the below activates experimental ring polling code + * (which skips index reads on consumer in favor of looking at + * high bits of ring id ^ 0x8000). + */ +/* #ifdef RING_POLL */ +/* enabling the below activates experimental in-order code + * (which skips ring updates and reads and writes len in descriptor). + */ +/* #ifdef INORDER */ + +#if defined(RING_POLL) && defined(INORDER) +#error "RING_POLL and INORDER are mutually exclusive" +#endif + +/* how much padding is needed to avoid false cache sharing */ +#define HOST_GUEST_PADDING 0x80 + +struct guest { + unsigned short avail_idx; + unsigned short last_used_idx; + unsigned short num_free; + unsigned short kicked_avail_idx; +#ifndef INORDER + unsigned short free_head; +#else + unsigned short reserved_free_head; +#endif + unsigned char reserved[HOST_GUEST_PADDING - 10]; +} guest; + +struct host { + /* we do not need to track last avail index + * unless we have more than one in flight. + */ + unsigned short used_idx; + unsigned short called_used_idx; + unsigned char reserved[HOST_GUEST_PADDING - 4]; +} host; + +/* implemented by ring */ +void alloc_ring(void) +{ + int ret; + int i; + void *p; + + ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000)); + if (ret) { + perror("Unable to allocate ring buffer.\n"); + exit(3); + } + memset(p, 0, vring_size(ring_size, 0x1000)); + vring_init(&ring, ring_size, p, 0x1000); + + guest.avail_idx = 0; + guest.kicked_avail_idx = -1; + guest.last_used_idx = 0; +#ifndef INORDER + /* Put everything in free lists. */ + guest.free_head = 0; +#endif + for (i = 0; i < ring_size - 1; i++) + ring.desc[i].next = i + 1; + host.used_idx = 0; + host.called_used_idx = -1; + guest.num_free = ring_size; + data = malloc(ring_size * sizeof *data); + if (!data) { + perror("Unable to allocate data buffer.\n"); + exit(3); + } + memset(data, 0, ring_size * sizeof *data); +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + unsigned head; +#ifndef INORDER + unsigned avail; +#endif + struct vring_desc *desc; + + if (!guest.num_free) + return -1; + +#ifdef INORDER + head = (ring_size - 1) & (guest.avail_idx++); +#else + head = guest.free_head; +#endif + guest.num_free--; + + desc = ring.desc; + desc[head].flags = VRING_DESC_F_NEXT; + desc[head].addr = (unsigned long)(void *)buf; + desc[head].len = len; + /* We do it like this to simulate the way + * we'd have to flip it if we had multiple + * descriptors. + */ + desc[head].flags &= ~VRING_DESC_F_NEXT; +#ifndef INORDER + guest.free_head = desc[head].next; +#endif + + data[head].data = datap; + +#ifdef RING_POLL + /* Barrier A (for pairing) */ + smp_release(); + avail = guest.avail_idx++; + ring.avail->ring[avail & (ring_size - 1)] = + (head | (avail & ~(ring_size - 1))) ^ 0x8000; +#else +#ifndef INORDER + /* Barrier A (for pairing) */ + smp_release(); + avail = (ring_size - 1) & (guest.avail_idx++); + ring.avail->ring[avail] = head; +#endif + /* Barrier A (for pairing) */ + smp_release(); +#endif + ring.avail->idx = guest.avail_idx; + return 0; +} + +void *get_buf(unsigned *lenp, void **bufp) +{ + unsigned head; + unsigned index; + void *datap; + +#ifdef RING_POLL + head = (ring_size - 1) & guest.last_used_idx; + index = ring.used->ring[head].id; + if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1)) + return NULL; + /* Barrier B (for pairing) */ + smp_acquire(); + index &= ring_size - 1; +#else + if (ring.used->idx == guest.last_used_idx) + return NULL; + /* Barrier B (for pairing) */ + smp_acquire(); +#ifdef INORDER + head = (ring_size - 1) & guest.last_used_idx; + index = head; +#else + head = (ring_size - 1) & guest.last_used_idx; + index = ring.used->ring[head].id; +#endif + +#endif +#ifdef INORDER + *lenp = ring.desc[index].len; +#else + *lenp = ring.used->ring[head].len; +#endif + datap = data[index].data; + *bufp = (void*)(unsigned long)ring.desc[index].addr; + data[index].data = NULL; +#ifndef INORDER + ring.desc[index].next = guest.free_head; + guest.free_head = index; +#endif + guest.num_free++; + guest.last_used_idx++; + return datap; +} + +bool used_empty() +{ + unsigned short last_used_idx = guest.last_used_idx; +#ifdef RING_POLL + unsigned short head = last_used_idx & (ring_size - 1); + unsigned index = ring.used->ring[head].id; + + return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1); +#else + return ring.used->idx == last_used_idx; +#endif +} + +void disable_call() +{ + /* Doing nothing to disable calls might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_call() +{ + vring_used_event(&ring) = guest.last_used_idx; + /* Flush call index write */ + /* Barrier D (for pairing) */ + smp_mb(); + return used_empty(); +} + +void kick_available(void) +{ + bool need; + + /* Flush in previous flags write */ + /* Barrier C (for pairing) */ + smp_mb(); + need = vring_need_event(vring_avail_event(&ring), + guest.avail_idx, + guest.kicked_avail_idx); + + guest.kicked_avail_idx = guest.avail_idx; + if (need) + kick(); +} + +/* host side */ +void disable_kick() +{ + /* Doing nothing to disable kicks might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_kick() +{ + vring_avail_event(&ring) = host.used_idx; + /* Barrier C (for pairing) */ + smp_mb(); + return avail_empty(); +} + +bool avail_empty() +{ + unsigned head = host.used_idx; +#ifdef RING_POLL + unsigned index = ring.avail->ring[head & (ring_size - 1)]; + + return ((index ^ head ^ 0x8000) & ~(ring_size - 1)); +#else + return head == ring.avail->idx; +#endif +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + unsigned used_idx = host.used_idx; + struct vring_desc *desc; + unsigned head; + +#ifdef RING_POLL + head = ring.avail->ring[used_idx & (ring_size - 1)]; + if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1)) + return false; + /* Barrier A (for pairing) */ + smp_acquire(); + + used_idx &= ring_size - 1; + desc = &ring.desc[head & (ring_size - 1)]; +#else + if (used_idx == ring.avail->idx) + return false; + + /* Barrier A (for pairing) */ + smp_acquire(); + + used_idx &= ring_size - 1; +#ifdef INORDER + head = used_idx; +#else + head = ring.avail->ring[used_idx]; +#endif + desc = &ring.desc[head]; +#endif + + *lenp = desc->len; + *bufp = (void *)(unsigned long)desc->addr; + +#ifdef INORDER + desc->len = desc->len - 1; +#else + /* now update used ring */ + ring.used->ring[used_idx].id = head; + ring.used->ring[used_idx].len = desc->len - 1; +#endif + /* Barrier B (for pairing) */ + smp_release(); + host.used_idx++; + ring.used->idx = host.used_idx; + + return true; +} + +void call_used(void) +{ + bool need; + + /* Flush in previous flags write */ + /* Barrier D (for pairing) */ + smp_mb(); + need = vring_need_event(vring_used_event(&ring), + host.used_idx, + host.called_used_idx); + + host.called_used_idx = host.used_idx; + if (need) + call(); +} diff --git a/tools/virtio/ringtest/virtio_ring_inorder.c b/tools/virtio/ringtest/virtio_ring_inorder.c new file mode 100644 index 000000000..2438ca58a --- /dev/null +++ b/tools/virtio/ringtest/virtio_ring_inorder.c @@ -0,0 +1,2 @@ +#define INORDER 1 +#include "virtio_ring_0_9.c" diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c new file mode 100644 index 000000000..84fc2c557 --- /dev/null +++ b/tools/virtio/ringtest/virtio_ring_poll.c @@ -0,0 +1,2 @@ +#define RING_POLL 1 +#include "virtio_ring_0_9.c" diff --git a/tools/virtio/uapi/linux/uio.h b/tools/virtio/uapi/linux/uio.h new file mode 100644 index 000000000..7230e9002 --- /dev/null +++ b/tools/virtio/uapi/linux/uio.h @@ -0,0 +1 @@ +#include <sys/uio.h> diff --git a/tools/virtio/uapi/linux/virtio_config.h b/tools/virtio/uapi/linux/virtio_config.h new file mode 100644 index 000000000..4c86675f0 --- /dev/null +++ b/tools/virtio/uapi/linux/virtio_config.h @@ -0,0 +1 @@ +#include "../../../../include/uapi/linux/virtio_config.h" diff --git a/tools/virtio/uapi/linux/virtio_ring.h b/tools/virtio/uapi/linux/virtio_ring.h new file mode 100644 index 000000000..cf50b2e5f --- /dev/null +++ b/tools/virtio/uapi/linux/virtio_ring.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef VIRTIO_RING_H +#define VIRTIO_RING_H +#include "../../../../include/uapi/linux/virtio_ring.h" +#endif /* VIRTIO_RING_H */ diff --git a/tools/virtio/uapi/linux/virtio_types.h b/tools/virtio/uapi/linux/virtio_types.h new file mode 100644 index 000000000..e7a1096e7 --- /dev/null +++ b/tools/virtio/uapi/linux/virtio_types.h @@ -0,0 +1 @@ +#include "../../include/uapi/linux/virtio_types.h" diff --git a/tools/virtio/vhost_test/Makefile b/tools/virtio/vhost_test/Makefile new file mode 100644 index 000000000..a1d35b81b --- /dev/null +++ b/tools/virtio/vhost_test/Makefile @@ -0,0 +1,2 @@ +obj-m += vhost_test.o +EXTRA_CFLAGS += -Idrivers/vhost diff --git a/tools/virtio/vhost_test/vhost_test.c b/tools/virtio/vhost_test/vhost_test.c new file mode 100644 index 000000000..18735189e --- /dev/null +++ b/tools/virtio/vhost_test/vhost_test.c @@ -0,0 +1 @@ +#include "test.c" diff --git a/tools/virtio/virtio-trace/Makefile b/tools/virtio/virtio-trace/Makefile new file mode 100644 index 000000000..7843ebcda --- /dev/null +++ b/tools/virtio/virtio-trace/Makefile @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 +CC = gcc +CFLAGS = -O2 -Wall -pthread + +all: trace-agent + +.c.o: + $(CC) $(CFLAGS) -c $^ -o $@ + +trace-agent: trace-agent.o trace-agent-ctl.o trace-agent-rw.o + $(CC) $(CFLAGS) -o $@ $^ + +clean: + rm -f *.o trace-agent diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README new file mode 100644 index 000000000..b64845b82 --- /dev/null +++ b/tools/virtio/virtio-trace/README @@ -0,0 +1,118 @@ +Trace Agent for virtio-trace +============================ + +Trace agent is a user tool for sending trace data of a guest to a Host in low +overhead. Trace agent has the following functions: + - splice a page of ring-buffer to read_pipe without memory copying + - splice the page from write_pipe to virtio-console without memory copying + - write trace data to stdout by using -o option + - controlled by start/stop orders from a Host + +The trace agent operates as follows: + 1) Initialize all structures. + 2) Create a read/write thread per CPU. Each thread is bound to a CPU. + The read/write threads hold it. + 3) A controller thread does poll() for a start order of a host. + 4) After the controller of the trace agent receives a start order from a host, + the controller wake read/write threads. + 5) The read/write threads start to read trace data from ring-buffers and + write the data to virtio-serial. + 6) If the controller receives a stop order from a host, the read/write threads + stop to read trace data. + + +Files +===== + +README: this file +Makefile: Makefile of trace agent for virtio-trace +trace-agent.c: includes main function, sets up for operating trace agent +trace-agent.h: includes all structures and some macros +trace-agent-ctl.c: includes controller function for read/write threads +trace-agent-rw.c: includes read/write threads function + + +Setup +===== + +To use this trace agent for virtio-trace, we need to prepare some virtio-serial +I/Fs. + +1) Make FIFO in a host + virtio-trace uses virtio-serial pipe as trace data paths as to the number +of CPUs and a control path, so FIFO (named pipe) should be created as follows: + # mkdir /tmp/virtio-trace/ + # mkfifo /tmp/virtio-trace/trace-path-cpu{0,1,2,...,X}.{in,out} + # mkfifo /tmp/virtio-trace/agent-ctl-path.{in,out} + +For example, if a guest use three CPUs, the names are + trace-path-cpu{0,1,2}.{in.out} +and + agent-ctl-path.{in,out}. + +2) Set up of virtio-serial pipe in a host + Add qemu option to use virtio-serial pipe. + + ##virtio-serial device## + -device virtio-serial-pci,id=virtio-serial0\ + ##control path## + -chardev pipe,id=charchannel0,path=/tmp/virtio-trace/agent-ctl-path\ + -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,\ + id=channel0,name=agent-ctl-path\ + ##data path## + -chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\ + -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\ + id=channel1,name=trace-path-cpu0\ + ... + +If you manage guests with libvirt, add the following tags to domain XML files. +Then, libvirt passes the same command option to qemu. + + <channel type='pipe'> + <source path='/tmp/virtio-trace/agent-ctl-path'/> + <target type='virtio' name='agent-ctl-path'/> + <address type='virtio-serial' controller='0' bus='0' port='0'/> + </channel> + <channel type='pipe'> + <source path='/tmp/virtio-trace/trace-path-cpu0'/> + <target type='virtio' name='trace-path-cpu0'/> + <address type='virtio-serial' controller='0' bus='0' port='1'/> + </channel> + ... +Here, chardev names are restricted to trace-path-cpuX and agent-ctl-path. For +example, if a guest use three CPUs, chardev names should be trace-path-cpu0, +trace-path-cpu1, trace-path-cpu2, and agent-ctl-path. + +3) Boot the guest + You can find some chardev in /dev/virtio-ports/ in the guest. + + +Run +=== + +0) Build trace agent in a guest + $ make + +1) Enable ftrace in the guest + <Example> + # echo 1 > /sys/kernel/debug/tracing/events/sched/enable + +2) Run trace agent in the guest + This agent must be operated as root. + # ./trace-agent +read/write threads in the agent wait for start order from host. If you add -o +option, trace data are output via stdout in the guest. + +3) Open FIFO in a host + # cat /tmp/virtio-trace/trace-path-cpu0.out +If a host does not open these, trace data get stuck in buffers of virtio. Then, +the guest will stop by specification of chardev in QEMU. This blocking mode may +be solved in the future. + +4) Start to read trace data by ordering from a host + A host injects read start order to the guest via virtio-serial. + # echo 1 > /tmp/virtio-trace/agent-ctl-path.in + +5) Stop to read trace data by ordering from a host + A host injects read stop order to the guest via virtio-serial. + # echo 0 > /tmp/virtio-trace/agent-ctl-path.in diff --git a/tools/virtio/virtio-trace/trace-agent-ctl.c b/tools/virtio/virtio-trace/trace-agent-ctl.c new file mode 100644 index 000000000..a2d0403c4 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent-ctl.c @@ -0,0 +1,137 @@ +/* + * Controller of read/write threads for virtio-trace + * + * Copyright (C) 2012 Hitachi, Ltd. + * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> + * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * Licensed under GPL version 2 only. + * + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <poll.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include "trace-agent.h" + +#define HOST_MSG_SIZE 256 +#define EVENT_WAIT_MSEC 100 + +static volatile sig_atomic_t global_signal_val; +bool global_sig_receive; /* default false */ +bool global_run_operation; /* default false*/ + +/* Handle SIGTERM/SIGINT/SIGQUIT to exit */ +static void signal_handler(int sig) +{ + global_signal_val = sig; +} + +int rw_ctl_init(const char *ctl_path) +{ + int ctl_fd; + + ctl_fd = open(ctl_path, O_RDONLY); + if (ctl_fd == -1) { + pr_err("Cannot open ctl_fd\n"); + goto error; + } + + return ctl_fd; + +error: + exit(EXIT_FAILURE); +} + +static int wait_order(int ctl_fd) +{ + struct pollfd poll_fd; + int ret = 0; + + while (!global_sig_receive) { + poll_fd.fd = ctl_fd; + poll_fd.events = POLLIN; + + ret = poll(&poll_fd, 1, EVENT_WAIT_MSEC); + + if (global_signal_val) { + global_sig_receive = true; + pr_info("Receive interrupt %d\n", global_signal_val); + + /* Wakes rw-threads when they are sleeping */ + if (!global_run_operation) + pthread_cond_broadcast(&cond_wakeup); + + ret = -1; + break; + } + + if (ret < 0) { + pr_err("Polling error\n"); + goto error; + } + + if (ret) + break; + }; + + return ret; + +error: + exit(EXIT_FAILURE); +} + +/* + * contol read/write threads by handling global_run_operation + */ +void *rw_ctl_loop(int ctl_fd) +{ + ssize_t rlen; + char buf[HOST_MSG_SIZE]; + int ret; + + /* Setup signal handlers */ + signal(SIGTERM, signal_handler); + signal(SIGINT, signal_handler); + signal(SIGQUIT, signal_handler); + + while (!global_sig_receive) { + + ret = wait_order(ctl_fd); + if (ret < 0) + break; + + rlen = read(ctl_fd, buf, sizeof(buf)); + if (rlen < 0) { + pr_err("read data error in ctl thread\n"); + goto error; + } + + if (rlen == 2 && buf[0] == '1') { + /* + * If host writes '1' to a control path, + * this controller wakes all read/write threads. + */ + global_run_operation = true; + pthread_cond_broadcast(&cond_wakeup); + pr_debug("Wake up all read/write threads\n"); + } else if (rlen == 2 && buf[0] == '0') { + /* + * If host writes '0' to a control path, read/write + * threads will wait for notification from Host. + */ + global_run_operation = false; + pr_debug("Stop all read/write threads\n"); + } else + pr_info("Invalid host notification: %s\n", buf); + } + + return NULL; + +error: + exit(EXIT_FAILURE); +} diff --git a/tools/virtio/virtio-trace/trace-agent-rw.c b/tools/virtio/virtio-trace/trace-agent-rw.c new file mode 100644 index 000000000..3aace5ea4 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent-rw.c @@ -0,0 +1,192 @@ +/* + * Read/write thread of a guest agent for virtio-trace + * + * Copyright (C) 2012 Hitachi, Ltd. + * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> + * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * Licensed under GPL version 2 only. + * + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/syscall.h> +#include "trace-agent.h" + +#define READ_WAIT_USEC 100000 + +void *rw_thread_info_new(void) +{ + struct rw_thread_info *rw_ti; + + rw_ti = zalloc(sizeof(struct rw_thread_info)); + if (rw_ti == NULL) { + pr_err("rw_thread_info zalloc error\n"); + exit(EXIT_FAILURE); + } + + rw_ti->cpu_num = -1; + rw_ti->in_fd = -1; + rw_ti->out_fd = -1; + rw_ti->read_pipe = -1; + rw_ti->write_pipe = -1; + rw_ti->pipe_size = PIPE_INIT; + + return rw_ti; +} + +void *rw_thread_init(int cpu, const char *in_path, const char *out_path, + bool stdout_flag, unsigned long pipe_size, + struct rw_thread_info *rw_ti) +{ + int data_pipe[2]; + + rw_ti->cpu_num = cpu; + + /* set read(input) fd */ + rw_ti->in_fd = open(in_path, O_RDONLY); + if (rw_ti->in_fd == -1) { + pr_err("Could not open in_fd (CPU:%d)\n", cpu); + goto error; + } + + /* set write(output) fd */ + if (!stdout_flag) { + /* virtio-serial output mode */ + rw_ti->out_fd = open(out_path, O_WRONLY); + if (rw_ti->out_fd == -1) { + pr_err("Could not open out_fd (CPU:%d)\n", cpu); + goto error; + } + } else + /* stdout mode */ + rw_ti->out_fd = STDOUT_FILENO; + + if (pipe2(data_pipe, O_NONBLOCK) < 0) { + pr_err("Could not create pipe in rw-thread(%d)\n", cpu); + goto error; + } + + /* + * Size of pipe is 64kB in default based on fs/pipe.c. + * To read/write trace data speedy, pipe size is changed. + */ + if (fcntl(*data_pipe, F_SETPIPE_SZ, pipe_size) < 0) { + pr_err("Could not change pipe size in rw-thread(%d)\n", cpu); + goto error; + } + + rw_ti->read_pipe = data_pipe[1]; + rw_ti->write_pipe = data_pipe[0]; + rw_ti->pipe_size = pipe_size; + + return NULL; + +error: + exit(EXIT_FAILURE); +} + +/* Bind a thread to a cpu */ +static void bind_cpu(int cpu_num) +{ + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpu_num, &mask); + + /* bind my thread to cpu_num by assigning zero to the first argument */ + if (sched_setaffinity(0, sizeof(mask), &mask) == -1) + pr_err("Could not set CPU#%d affinity\n", (int)cpu_num); +} + +static void *rw_thread_main(void *thread_info) +{ + ssize_t rlen, wlen; + ssize_t ret; + struct rw_thread_info *ts = (struct rw_thread_info *)thread_info; + + bind_cpu(ts->cpu_num); + + while (1) { + /* Wait for a read order of trace data by Host OS */ + if (!global_run_operation) { + pthread_mutex_lock(&mutex_notify); + pthread_cond_wait(&cond_wakeup, &mutex_notify); + pthread_mutex_unlock(&mutex_notify); + } + + if (global_sig_receive) + break; + + /* + * Each thread read trace_pipe_raw of each cpu bounding the + * thread, so contention of multi-threads does not occur. + */ + rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL, + ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE); + + if (rlen < 0) { + pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num); + goto error; + } else if (rlen == 0) { + /* + * If trace data do not exist or are unreadable not + * for exceeding the page size, splice_read returns + * NULL. Then, this waits for being filled the data in a + * ring-buffer. + */ + usleep(READ_WAIT_USEC); + pr_debug("Read retry(cpu:%d)\n", ts->cpu_num); + continue; + } + + wlen = 0; + + do { + ret = splice(ts->write_pipe, NULL, ts->out_fd, NULL, + rlen - wlen, + SPLICE_F_MOVE | SPLICE_F_MORE); + + if (ret < 0) { + pr_err("Splice_write in rw-thread(%d)\n", + ts->cpu_num); + goto error; + } else if (ret == 0) + /* + * When host reader is not in time for reading + * trace data, guest will be stopped. This is + * because char dev in QEMU is not supported + * non-blocking mode. Then, writer might be + * sleep in that case. + * This sleep will be removed by supporting + * non-blocking mode. + */ + sleep(1); + wlen += ret; + } while (wlen < rlen); + } + + return NULL; + +error: + exit(EXIT_FAILURE); +} + + +pthread_t rw_thread_run(struct rw_thread_info *rw_ti) +{ + int ret; + pthread_t rw_thread_per_cpu; + + ret = pthread_create(&rw_thread_per_cpu, NULL, rw_thread_main, rw_ti); + if (ret != 0) { + pr_err("Could not create a rw thread(%d)\n", rw_ti->cpu_num); + exit(EXIT_FAILURE); + } + + return rw_thread_per_cpu; +} diff --git a/tools/virtio/virtio-trace/trace-agent.c b/tools/virtio/virtio-trace/trace-agent.c new file mode 100644 index 000000000..0a0a7dd4e --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent.c @@ -0,0 +1,270 @@ +/* + * Guest agent for virtio-trace + * + * Copyright (C) 2012 Hitachi, Ltd. + * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> + * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * Licensed under GPL version 2 only. + * + */ + +#define _GNU_SOURCE +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include "trace-agent.h" + +#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) +#define PIPE_DEF_BUFS 16 +#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS) +#define PIPE_MAX_SIZE (1024*1024) +#define READ_PATH_FMT \ + "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw" +#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d" +#define CTL_PATH "/dev/virtio-ports/agent-ctl-path" + +pthread_mutex_t mutex_notify = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t cond_wakeup = PTHREAD_COND_INITIALIZER; + +static int get_total_cpus(void) +{ + int nr_cpus = (int)sysconf(_SC_NPROCESSORS_CONF); + + if (nr_cpus <= 0) { + pr_err("Could not read cpus\n"); + goto error; + } else if (nr_cpus > MAX_CPUS) { + pr_err("Exceed max cpus(%d)\n", (int)MAX_CPUS); + goto error; + } + + return nr_cpus; + +error: + exit(EXIT_FAILURE); +} + +static void *agent_info_new(void) +{ + struct agent_info *s; + int i; + + s = zalloc(sizeof(struct agent_info)); + if (s == NULL) { + pr_err("agent_info zalloc error\n"); + exit(EXIT_FAILURE); + } + + s->pipe_size = PIPE_INIT; + s->use_stdout = false; + s->cpus = get_total_cpus(); + s->ctl_fd = -1; + + /* read/write threads init */ + for (i = 0; i < s->cpus; i++) + s->rw_ti[i] = rw_thread_info_new(); + + return s; +} + +static unsigned long parse_size(const char *arg) +{ + unsigned long value, round; + char *ptr; + + value = strtoul(arg, &ptr, 10); + switch (*ptr) { + case 'K': case 'k': + value <<= 10; + break; + case 'M': case 'm': + value <<= 20; + break; + default: + break; + } + + if (value > PIPE_MAX_SIZE) { + pr_err("Pipe size must be less than 1MB\n"); + goto error; + } else if (value < PIPE_MIN_SIZE) { + pr_err("Pipe size must be over 64KB\n"); + goto error; + } + + /* Align buffer size with page unit */ + round = value & (PAGE_SIZE - 1); + value = value - round; + + return value; +error: + return 0; +} + +static void usage(char const *prg) +{ + pr_err("usage: %s [-h] [-o] [-s <size of pipe>]\n", prg); +} + +static const char *make_path(int cpu_num, bool this_is_write_path) +{ + int ret; + char *buf; + + buf = zalloc(PATH_MAX); + if (buf == NULL) { + pr_err("Could not allocate buffer\n"); + goto error; + } + + if (this_is_write_path) + /* write(output) path */ + ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num); + else + /* read(input) path */ + ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num); + + if (ret <= 0) { + pr_err("Failed to generate %s path(CPU#%d):%d\n", + this_is_write_path ? "read" : "write", cpu_num, ret); + goto error; + } + + return buf; + +error: + free(buf); + return NULL; +} + +static const char *make_input_path(int cpu_num) +{ + return make_path(cpu_num, false); +} + +static const char *make_output_path(int cpu_num) +{ + return make_path(cpu_num, true); +} + +static void *agent_info_init(struct agent_info *s) +{ + int cpu; + const char *in_path = NULL; + const char *out_path = NULL; + + /* init read/write threads */ + for (cpu = 0; cpu < s->cpus; cpu++) { + /* set read(input) path per read/write thread */ + in_path = make_input_path(cpu); + if (in_path == NULL) + goto error; + + /* set write(output) path per read/write thread*/ + if (!s->use_stdout) { + out_path = make_output_path(cpu); + if (out_path == NULL) + goto error; + } else + /* stdout mode */ + pr_debug("stdout mode\n"); + + rw_thread_init(cpu, in_path, out_path, s->use_stdout, + s->pipe_size, s->rw_ti[cpu]); + } + + /* init controller of read/write threads */ + s->ctl_fd = rw_ctl_init((const char *)CTL_PATH); + + return NULL; + +error: + exit(EXIT_FAILURE); +} + +static void *parse_args(int argc, char *argv[], struct agent_info *s) +{ + int cmd; + unsigned long size; + + while ((cmd = getopt(argc, argv, "hos:")) != -1) { + switch (cmd) { + /* stdout mode */ + case 'o': + s->use_stdout = true; + break; + /* size of pipe */ + case 's': + size = parse_size(optarg); + if (size == 0) + goto error; + s->pipe_size = size; + break; + case 'h': + default: + usage(argv[0]); + goto error; + } + } + + agent_info_init(s); + + return NULL; + +error: + exit(EXIT_FAILURE); +} + +static void agent_main_loop(struct agent_info *s) +{ + int cpu; + pthread_t rw_thread_per_cpu[MAX_CPUS]; + + /* Start all read/write threads */ + for (cpu = 0; cpu < s->cpus; cpu++) + rw_thread_per_cpu[cpu] = rw_thread_run(s->rw_ti[cpu]); + + rw_ctl_loop(s->ctl_fd); + + /* Finish all read/write threads */ + for (cpu = 0; cpu < s->cpus; cpu++) { + int ret; + + ret = pthread_join(rw_thread_per_cpu[cpu], NULL); + if (ret != 0) { + pr_err("pthread_join() error:%d (cpu %d)\n", ret, cpu); + exit(EXIT_FAILURE); + } + } +} + +static void agent_info_free(struct agent_info *s) +{ + int i; + + close(s->ctl_fd); + for (i = 0; i < s->cpus; i++) { + close(s->rw_ti[i]->in_fd); + close(s->rw_ti[i]->out_fd); + close(s->rw_ti[i]->read_pipe); + close(s->rw_ti[i]->write_pipe); + free(s->rw_ti[i]); + } + free(s); +} + +int main(int argc, char *argv[]) +{ + struct agent_info *s = NULL; + + s = agent_info_new(); + parse_args(argc, argv, s); + + agent_main_loop(s); + + agent_info_free(s); + + return 0; +} diff --git a/tools/virtio/virtio-trace/trace-agent.h b/tools/virtio/virtio-trace/trace-agent.h new file mode 100644 index 000000000..e67885969 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TRACE_AGENT_H__ +#define __TRACE_AGENT_H__ +#include <pthread.h> +#include <stdbool.h> + +#define MAX_CPUS 256 +#define PIPE_INIT (1024*1024) + +/* + * agent_info - structure managing total information of guest agent + * @pipe_size: size of pipe (default 1MB) + * @use_stdout: set to true when o option is added (default false) + * @cpus: total number of CPUs + * @ctl_fd: fd of control path, /dev/virtio-ports/agent-ctl-path + * @rw_ti: structure managing information of read/write threads + */ +struct agent_info { + unsigned long pipe_size; + bool use_stdout; + int cpus; + int ctl_fd; + struct rw_thread_info *rw_ti[MAX_CPUS]; +}; + +/* + * rw_thread_info - structure managing a read/write thread a cpu + * @cpu_num: cpu number operating this read/write thread + * @in_fd: fd of reading trace data path in cpu_num + * @out_fd: fd of writing trace data path in cpu_num + * @read_pipe: fd of read pipe + * @write_pipe: fd of write pipe + * @pipe_size: size of pipe (default 1MB) + */ +struct rw_thread_info { + int cpu_num; + int in_fd; + int out_fd; + int read_pipe; + int write_pipe; + unsigned long pipe_size; +}; + +/* use for stopping rw threads */ +extern bool global_sig_receive; + +/* use for notification */ +extern bool global_run_operation; +extern pthread_mutex_t mutex_notify; +extern pthread_cond_t cond_wakeup; + +/* for controller of read/write threads */ +extern int rw_ctl_init(const char *ctl_path); +extern void *rw_ctl_loop(int ctl_fd); + +/* for trace read/write thread */ +extern void *rw_thread_info_new(void); +extern void *rw_thread_init(int cpu, const char *in_path, const char *out_path, + bool stdout_flag, unsigned long pipe_size, + struct rw_thread_info *rw_ti); +extern pthread_t rw_thread_run(struct rw_thread_info *rw_ti); + +static inline void *zalloc(size_t size) +{ + return calloc(1, size); +} + +#define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__) +#define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__) +#ifdef DEBUG +#define pr_debug(format, ...) fprintf(stderr, format, ## __VA_ARGS__) +#else +#define pr_debug(format, ...) do {} while (0) +#endif + +#endif /*__TRACE_AGENT_H__*/ diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c new file mode 100644 index 000000000..b427def67 --- /dev/null +++ b/tools/virtio/virtio_test.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <getopt.h> +#include <string.h> +#include <poll.h> +#include <sys/eventfd.h> +#include <stdlib.h> +#include <assert.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <fcntl.h> +#include <stdbool.h> +#include <linux/virtio_types.h> +#include <linux/vhost.h> +#include <linux/virtio.h> +#include <linux/virtio_ring.h> +#include "../../drivers/vhost/test.h" + +/* Unused */ +void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; + +struct vq_info { + int kick; + int call; + int num; + int idx; + void *ring; + /* copy used for control */ + struct vring vring; + struct virtqueue *vq; +}; + +struct vdev_info { + struct virtio_device vdev; + int control; + struct pollfd fds[1]; + struct vq_info vqs[1]; + int nvqs; + void *buf; + size_t buf_size; + struct vhost_memory *mem; +}; + +bool vq_notify(struct virtqueue *vq) +{ + struct vq_info *info = vq->priv; + unsigned long long v = 1; + int r; + r = write(info->kick, &v, sizeof v); + assert(r == sizeof v); + return true; +} + +void vq_callback(struct virtqueue *vq) +{ +} + + +void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info) +{ + struct vhost_vring_state state = { .index = info->idx }; + struct vhost_vring_file file = { .index = info->idx }; + unsigned long long features = dev->vdev.features; + struct vhost_vring_addr addr = { + .index = info->idx, + .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc, + .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail, + .used_user_addr = (uint64_t)(unsigned long)info->vring.used, + }; + int r; + r = ioctl(dev->control, VHOST_SET_FEATURES, &features); + assert(r >= 0); + state.num = info->vring.num; + r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state); + assert(r >= 0); + state.num = 0; + r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state); + assert(r >= 0); + r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr); + assert(r >= 0); + file.fd = info->kick; + r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file); + assert(r >= 0); + file.fd = info->call; + r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file); + assert(r >= 0); +} + +static void vq_info_add(struct vdev_info *dev, int num) +{ + struct vq_info *info = &dev->vqs[dev->nvqs]; + int r; + info->idx = dev->nvqs; + info->kick = eventfd(0, EFD_NONBLOCK); + info->call = eventfd(0, EFD_NONBLOCK); + r = posix_memalign(&info->ring, 4096, vring_size(num, 4096)); + assert(r >= 0); + memset(info->ring, 0, vring_size(num, 4096)); + vring_init(&info->vring, num, info->ring, 4096); + info->vq = vring_new_virtqueue(info->idx, + info->vring.num, 4096, &dev->vdev, + true, false, info->ring, + vq_notify, vq_callback, "test"); + assert(info->vq); + info->vq->priv = info; + vhost_vq_setup(dev, info); + dev->fds[info->idx].fd = info->call; + dev->fds[info->idx].events = POLLIN; + dev->nvqs++; +} + +static void vdev_info_init(struct vdev_info* dev, unsigned long long features) +{ + int r; + memset(dev, 0, sizeof *dev); + dev->vdev.features = features; + dev->buf_size = 1024; + dev->buf = malloc(dev->buf_size); + assert(dev->buf); + dev->control = open("/dev/vhost-test", O_RDWR); + assert(dev->control >= 0); + r = ioctl(dev->control, VHOST_SET_OWNER, NULL); + assert(r >= 0); + dev->mem = malloc(offsetof(struct vhost_memory, regions) + + sizeof dev->mem->regions[0]); + assert(dev->mem); + memset(dev->mem, 0, offsetof(struct vhost_memory, regions) + + sizeof dev->mem->regions[0]); + dev->mem->nregions = 1; + dev->mem->regions[0].guest_phys_addr = (long)dev->buf; + dev->mem->regions[0].userspace_addr = (long)dev->buf; + dev->mem->regions[0].memory_size = dev->buf_size; + r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); + assert(r >= 0); +} + +/* TODO: this is pretty bad: we get a cache line bounce + * for the wait queue on poll and another one on read, + * plus the read which is there just to clear the + * current state. */ +static void wait_for_interrupt(struct vdev_info *dev) +{ + int i; + unsigned long long val; + poll(dev->fds, dev->nvqs, -1); + for (i = 0; i < dev->nvqs; ++i) + if (dev->fds[i].revents & POLLIN) { + read(dev->fds[i].fd, &val, sizeof val); + } +} + +static void run_test(struct vdev_info *dev, struct vq_info *vq, + bool delayed, int bufs) +{ + struct scatterlist sl; + long started = 0, completed = 0; + long completed_before; + int r, test = 1; + unsigned len; + long long spurious = 0; + r = ioctl(dev->control, VHOST_TEST_RUN, &test); + assert(r >= 0); + for (;;) { + virtqueue_disable_cb(vq->vq); + completed_before = completed; + do { + if (started < bufs) { + sg_init_one(&sl, dev->buf, dev->buf_size); + r = virtqueue_add_outbuf(vq->vq, &sl, 1, + dev->buf + started, + GFP_ATOMIC); + if (likely(r == 0)) { + ++started; + if (unlikely(!virtqueue_kick(vq->vq))) + r = -1; + } + } else + r = -1; + + /* Flush out completed bufs if any */ + if (virtqueue_get_buf(vq->vq, &len)) { + ++completed; + r = 0; + } + + } while (r == 0); + if (completed == completed_before) + ++spurious; + assert(completed <= bufs); + assert(started <= bufs); + if (completed == bufs) + break; + if (delayed) { + if (virtqueue_enable_cb_delayed(vq->vq)) + wait_for_interrupt(dev); + } else { + if (virtqueue_enable_cb(vq->vq)) + wait_for_interrupt(dev); + } + } + test = 0; + r = ioctl(dev->control, VHOST_TEST_RUN, &test); + assert(r >= 0); + fprintf(stderr, "spurious wakeups: 0x%llx\n", spurious); +} + +const char optstring[] = "h"; +const struct option longopts[] = { + { + .name = "help", + .val = 'h', + }, + { + .name = "event-idx", + .val = 'E', + }, + { + .name = "no-event-idx", + .val = 'e', + }, + { + .name = "indirect", + .val = 'I', + }, + { + .name = "no-indirect", + .val = 'i', + }, + { + .name = "virtio-1", + .val = '1', + }, + { + .name = "no-virtio-1", + .val = '0', + }, + { + .name = "delayed-interrupt", + .val = 'D', + }, + { + .name = "no-delayed-interrupt", + .val = 'd', + }, + { + } +}; + +static void help(void) +{ + fprintf(stderr, "Usage: virtio_test [--help]" + " [--no-indirect]" + " [--no-event-idx]" + " [--no-virtio-1]" + " [--delayed-interrupt]" + "\n"); +} + +int main(int argc, char **argv) +{ + struct vdev_info dev; + unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1); + int o; + bool delayed = false; + + for (;;) { + o = getopt_long(argc, argv, optstring, longopts, NULL); + switch (o) { + case -1: + goto done; + case '?': + help(); + exit(2); + case 'e': + features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); + break; + case 'h': + help(); + goto done; + case 'i': + features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); + break; + case '0': + features &= ~(1ULL << VIRTIO_F_VERSION_1); + break; + case 'D': + delayed = true; + break; + default: + assert(0); + break; + } + } + +done: + vdev_info_init(&dev, features); + vq_info_add(&dev, 256); + run_test(&dev, &dev.vqs[0], delayed, 0x100000); + return 0; +} diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c new file mode 100644 index 000000000..293653463 --- /dev/null +++ b/tools/virtio/vringh_test.c @@ -0,0 +1,751 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Simple test of virtio code, entirely in userpsace. */ +#define _GNU_SOURCE +#include <sched.h> +#include <err.h> +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/virtio.h> +#include <linux/vringh.h> +#include <linux/virtio_ring.h> +#include <linux/virtio_config.h> +#include <linux/uaccess.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include <fcntl.h> + +#define USER_MEM (1024*1024) +void *__user_addr_min, *__user_addr_max; +void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; +static u64 user_addr_offset; + +#define RINGSIZE 256 +#define ALIGN 4096 + +static bool never_notify_host(struct virtqueue *vq) +{ + abort(); +} + +static void never_callback_guest(struct virtqueue *vq) +{ + abort(); +} + +static bool getrange_iov(struct vringh *vrh, u64 addr, struct vringh_range *r) +{ + if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset) + return false; + if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset) + return false; + + r->start = (u64)(unsigned long)__user_addr_min - user_addr_offset; + r->end_incl = (u64)(unsigned long)__user_addr_max - 1 - user_addr_offset; + r->offset = user_addr_offset; + return true; +} + +/* We return single byte ranges. */ +static bool getrange_slow(struct vringh *vrh, u64 addr, struct vringh_range *r) +{ + if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset) + return false; + if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset) + return false; + + r->start = addr; + r->end_incl = r->start; + r->offset = user_addr_offset; + return true; +} + +struct guest_virtio_device { + struct virtio_device vdev; + int to_host_fd; + unsigned long notifies; +}; + +static bool parallel_notify_host(struct virtqueue *vq) +{ + int rc; + struct guest_virtio_device *gvdev; + + gvdev = container_of(vq->vdev, struct guest_virtio_device, vdev); + rc = write(gvdev->to_host_fd, "", 1); + if (rc < 0) + return false; + gvdev->notifies++; + return true; +} + +static bool no_notify_host(struct virtqueue *vq) +{ + return true; +} + +#define NUM_XFERS (10000000) + +/* We aim for two "distant" cpus. */ +static void find_cpus(unsigned int *first, unsigned int *last) +{ + unsigned int i; + + *first = -1U; + *last = 0; + for (i = 0; i < 4096; i++) { + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(i, &set); + if (sched_setaffinity(getpid(), sizeof(set), &set) == 0) { + if (i < *first) + *first = i; + if (i > *last) + *last = i; + } + } +} + +/* Opencoded version for fast mode */ +static inline int vringh_get_head(struct vringh *vrh, u16 *head) +{ + u16 avail_idx, i; + int err; + + err = get_user(avail_idx, &vrh->vring.avail->idx); + if (err) + return err; + + if (vrh->last_avail_idx == avail_idx) + return 0; + + /* Only get avail ring entries after they have been exposed by guest. */ + virtio_rmb(vrh->weak_barriers); + + i = vrh->last_avail_idx & (vrh->vring.num - 1); + + err = get_user(*head, &vrh->vring.avail->ring[i]); + if (err) + return err; + + vrh->last_avail_idx++; + return 1; +} + +static int parallel_test(u64 features, + bool (*getrange)(struct vringh *vrh, + u64 addr, struct vringh_range *r), + bool fast_vringh) +{ + void *host_map, *guest_map; + int fd, mapsize, to_guest[2], to_host[2]; + unsigned long xfers = 0, notifies = 0, receives = 0; + unsigned int first_cpu, last_cpu; + cpu_set_t cpu_set; + char buf[128]; + + /* Create real file to mmap. */ + fd = open("/tmp/vringh_test-file", O_RDWR|O_CREAT|O_TRUNC, 0600); + if (fd < 0) + err(1, "Opening /tmp/vringh_test-file"); + + /* Extra room at the end for some data, and indirects */ + mapsize = vring_size(RINGSIZE, ALIGN) + + RINGSIZE * 2 * sizeof(int) + + RINGSIZE * 6 * sizeof(struct vring_desc); + mapsize = (mapsize + getpagesize() - 1) & ~(getpagesize() - 1); + ftruncate(fd, mapsize); + + /* Parent and child use separate addresses, to check our mapping logic! */ + host_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + guest_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + + pipe(to_guest); + pipe(to_host); + + CPU_ZERO(&cpu_set); + find_cpus(&first_cpu, &last_cpu); + printf("Using CPUS %u and %u\n", first_cpu, last_cpu); + fflush(stdout); + + if (fork() != 0) { + struct vringh vrh; + int status, err, rlen = 0; + char rbuf[5]; + + /* We are the host: never access guest addresses! */ + munmap(guest_map, mapsize); + + __user_addr_min = host_map; + __user_addr_max = __user_addr_min + mapsize; + user_addr_offset = host_map - guest_map; + assert(user_addr_offset); + + close(to_guest[0]); + close(to_host[1]); + + vring_init(&vrh.vring, RINGSIZE, host_map, ALIGN); + vringh_init_user(&vrh, features, RINGSIZE, true, + vrh.vring.desc, vrh.vring.avail, vrh.vring.used); + CPU_SET(first_cpu, &cpu_set); + if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set)) + errx(1, "Could not set affinity to cpu %u", first_cpu); + + while (xfers < NUM_XFERS) { + struct iovec host_riov[2], host_wiov[2]; + struct vringh_iov riov, wiov; + u16 head, written; + + if (fast_vringh) { + for (;;) { + err = vringh_get_head(&vrh, &head); + if (err != 0) + break; + err = vringh_need_notify_user(&vrh); + if (err < 0) + errx(1, "vringh_need_notify_user: %i", + err); + if (err) { + write(to_guest[1], "", 1); + notifies++; + } + } + if (err != 1) + errx(1, "vringh_get_head"); + written = 0; + goto complete; + } else { + vringh_iov_init(&riov, + host_riov, + ARRAY_SIZE(host_riov)); + vringh_iov_init(&wiov, + host_wiov, + ARRAY_SIZE(host_wiov)); + + err = vringh_getdesc_user(&vrh, &riov, &wiov, + getrange, &head); + } + if (err == 0) { + err = vringh_need_notify_user(&vrh); + if (err < 0) + errx(1, "vringh_need_notify_user: %i", + err); + if (err) { + write(to_guest[1], "", 1); + notifies++; + } + + if (!vringh_notify_enable_user(&vrh)) + continue; + + /* Swallow all notifies at once. */ + if (read(to_host[0], buf, sizeof(buf)) < 1) + break; + + vringh_notify_disable_user(&vrh); + receives++; + continue; + } + if (err != 1) + errx(1, "vringh_getdesc_user: %i", err); + + /* We simply copy bytes. */ + if (riov.used) { + rlen = vringh_iov_pull_user(&riov, rbuf, + sizeof(rbuf)); + if (rlen != 4) + errx(1, "vringh_iov_pull_user: %i", + rlen); + assert(riov.i == riov.used); + written = 0; + } else { + err = vringh_iov_push_user(&wiov, rbuf, rlen); + if (err != rlen) + errx(1, "vringh_iov_push_user: %i", + err); + assert(wiov.i == wiov.used); + written = err; + } + complete: + xfers++; + + err = vringh_complete_user(&vrh, head, written); + if (err != 0) + errx(1, "vringh_complete_user: %i", err); + } + + err = vringh_need_notify_user(&vrh); + if (err < 0) + errx(1, "vringh_need_notify_user: %i", err); + if (err) { + write(to_guest[1], "", 1); + notifies++; + } + wait(&status); + if (!WIFEXITED(status)) + errx(1, "Child died with signal %i?", WTERMSIG(status)); + if (WEXITSTATUS(status) != 0) + errx(1, "Child exited %i?", WEXITSTATUS(status)); + printf("Host: notified %lu, pinged %lu\n", notifies, receives); + return 0; + } else { + struct guest_virtio_device gvdev; + struct virtqueue *vq; + unsigned int *data; + struct vring_desc *indirects; + unsigned int finished = 0; + + /* We pass sg[]s pointing into here, but we need RINGSIZE+1 */ + data = guest_map + vring_size(RINGSIZE, ALIGN); + indirects = (void *)data + (RINGSIZE + 1) * 2 * sizeof(int); + + /* We are the guest. */ + munmap(host_map, mapsize); + + close(to_guest[1]); + close(to_host[0]); + + gvdev.vdev.features = features; + gvdev.to_host_fd = to_host[1]; + gvdev.notifies = 0; + + CPU_SET(first_cpu, &cpu_set); + if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set)) + err(1, "Could not set affinity to cpu %u", first_cpu); + + vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true, + false, guest_map, + fast_vringh ? no_notify_host + : parallel_notify_host, + never_callback_guest, "guest vq"); + + /* Don't kfree indirects. */ + __kfree_ignore_start = indirects; + __kfree_ignore_end = indirects + RINGSIZE * 6; + + while (xfers < NUM_XFERS) { + struct scatterlist sg[4]; + unsigned int num_sg, len; + int *dbuf, err; + bool output = !(xfers % 2); + + /* Consume bufs. */ + while ((dbuf = virtqueue_get_buf(vq, &len)) != NULL) { + if (len == 4) + assert(*dbuf == finished - 1); + else if (!fast_vringh) + assert(*dbuf == finished); + finished++; + } + + /* Produce a buffer. */ + dbuf = data + (xfers % (RINGSIZE + 1)); + + if (output) + *dbuf = xfers; + else + *dbuf = -1; + + switch ((xfers / sizeof(*dbuf)) % 4) { + case 0: + /* Nasty three-element sg list. */ + sg_init_table(sg, num_sg = 3); + sg_set_buf(&sg[0], (void *)dbuf, 1); + sg_set_buf(&sg[1], (void *)dbuf + 1, 2); + sg_set_buf(&sg[2], (void *)dbuf + 3, 1); + break; + case 1: + sg_init_table(sg, num_sg = 2); + sg_set_buf(&sg[0], (void *)dbuf, 1); + sg_set_buf(&sg[1], (void *)dbuf + 1, 3); + break; + case 2: + sg_init_table(sg, num_sg = 1); + sg_set_buf(&sg[0], (void *)dbuf, 4); + break; + case 3: + sg_init_table(sg, num_sg = 4); + sg_set_buf(&sg[0], (void *)dbuf, 1); + sg_set_buf(&sg[1], (void *)dbuf + 1, 1); + sg_set_buf(&sg[2], (void *)dbuf + 2, 1); + sg_set_buf(&sg[3], (void *)dbuf + 3, 1); + break; + } + + /* May allocate an indirect, so force it to allocate + * user addr */ + __kmalloc_fake = indirects + (xfers % RINGSIZE) * 4; + if (output) + err = virtqueue_add_outbuf(vq, sg, num_sg, dbuf, + GFP_KERNEL); + else + err = virtqueue_add_inbuf(vq, sg, num_sg, + dbuf, GFP_KERNEL); + + if (err == -ENOSPC) { + if (!virtqueue_enable_cb_delayed(vq)) + continue; + /* Swallow all notifies at once. */ + if (read(to_guest[0], buf, sizeof(buf)) < 1) + break; + + receives++; + virtqueue_disable_cb(vq); + continue; + } + + if (err) + errx(1, "virtqueue_add_in/outbuf: %i", err); + + xfers++; + virtqueue_kick(vq); + } + + /* Any extra? */ + while (finished != xfers) { + int *dbuf; + unsigned int len; + + /* Consume bufs. */ + dbuf = virtqueue_get_buf(vq, &len); + if (dbuf) { + if (len == 4) + assert(*dbuf == finished - 1); + else + assert(len == 0); + finished++; + continue; + } + + if (!virtqueue_enable_cb_delayed(vq)) + continue; + if (read(to_guest[0], buf, sizeof(buf)) < 1) + break; + + receives++; + virtqueue_disable_cb(vq); + } + + printf("Guest: notified %lu, pinged %lu\n", + gvdev.notifies, receives); + vring_del_virtqueue(vq); + return 0; + } +} + +int main(int argc, char *argv[]) +{ + struct virtio_device vdev; + struct virtqueue *vq; + struct vringh vrh; + struct scatterlist guest_sg[RINGSIZE], *sgs[2]; + struct iovec host_riov[2], host_wiov[2]; + struct vringh_iov riov, wiov; + struct vring_used_elem used[RINGSIZE]; + char buf[28]; + u16 head; + int err; + unsigned i; + void *ret; + bool (*getrange)(struct vringh *vrh, u64 addr, struct vringh_range *r); + bool fast_vringh = false, parallel = false; + + getrange = getrange_iov; + vdev.features = 0; + + while (argv[1]) { + if (strcmp(argv[1], "--indirect") == 0) + __virtio_set_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC); + else if (strcmp(argv[1], "--eventidx") == 0) + __virtio_set_bit(&vdev, VIRTIO_RING_F_EVENT_IDX); + else if (strcmp(argv[1], "--virtio-1") == 0) + __virtio_set_bit(&vdev, VIRTIO_F_VERSION_1); + else if (strcmp(argv[1], "--slow-range") == 0) + getrange = getrange_slow; + else if (strcmp(argv[1], "--fast-vringh") == 0) + fast_vringh = true; + else if (strcmp(argv[1], "--parallel") == 0) + parallel = true; + else + errx(1, "Unknown arg %s", argv[1]); + argv++; + } + + if (parallel) + return parallel_test(vdev.features, getrange, fast_vringh); + + if (posix_memalign(&__user_addr_min, PAGE_SIZE, USER_MEM) != 0) + abort(); + __user_addr_max = __user_addr_min + USER_MEM; + memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN)); + + /* Set up guest side. */ + vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, false, + __user_addr_min, + never_notify_host, never_callback_guest, + "guest vq"); + + /* Set up host side. */ + vring_init(&vrh.vring, RINGSIZE, __user_addr_min, ALIGN); + vringh_init_user(&vrh, vdev.features, RINGSIZE, true, + vrh.vring.desc, vrh.vring.avail, vrh.vring.used); + + /* No descriptor to get yet... */ + err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head); + if (err != 0) + errx(1, "vringh_getdesc_user: %i", err); + + /* Guest puts in a descriptor. */ + memcpy(__user_addr_max - 1, "a", 1); + sg_init_table(guest_sg, 1); + sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1); + sg_init_table(guest_sg+1, 1); + sg_set_buf(&guest_sg[1], __user_addr_max - 3, 2); + sgs[0] = &guest_sg[0]; + sgs[1] = &guest_sg[1]; + + /* May allocate an indirect, so force it to allocate user addr */ + __kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN); + err = virtqueue_add_sgs(vq, sgs, 1, 1, &err, GFP_KERNEL); + if (err) + errx(1, "virtqueue_add_sgs: %i", err); + __kmalloc_fake = NULL; + + /* Host retreives it. */ + vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov)); + vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov)); + + err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head); + if (err != 1) + errx(1, "vringh_getdesc_user: %i", err); + + assert(riov.used == 1); + assert(riov.iov[0].iov_base == __user_addr_max - 1); + assert(riov.iov[0].iov_len == 1); + if (getrange != getrange_slow) { + assert(wiov.used == 1); + assert(wiov.iov[0].iov_base == __user_addr_max - 3); + assert(wiov.iov[0].iov_len == 2); + } else { + assert(wiov.used == 2); + assert(wiov.iov[0].iov_base == __user_addr_max - 3); + assert(wiov.iov[0].iov_len == 1); + assert(wiov.iov[1].iov_base == __user_addr_max - 2); + assert(wiov.iov[1].iov_len == 1); + } + + err = vringh_iov_pull_user(&riov, buf, 5); + if (err != 1) + errx(1, "vringh_iov_pull_user: %i", err); + assert(buf[0] == 'a'); + assert(riov.i == 1); + assert(vringh_iov_pull_user(&riov, buf, 5) == 0); + + memcpy(buf, "bcdef", 5); + err = vringh_iov_push_user(&wiov, buf, 5); + if (err != 2) + errx(1, "vringh_iov_push_user: %i", err); + assert(memcmp(__user_addr_max - 3, "bc", 2) == 0); + assert(wiov.i == wiov.used); + assert(vringh_iov_push_user(&wiov, buf, 5) == 0); + + /* Host is done. */ + err = vringh_complete_user(&vrh, head, err); + if (err != 0) + errx(1, "vringh_complete_user: %i", err); + + /* Guest should see used token now. */ + __kfree_ignore_start = __user_addr_min + vring_size(RINGSIZE, ALIGN); + __kfree_ignore_end = __kfree_ignore_start + 1; + ret = virtqueue_get_buf(vq, &i); + if (ret != &err) + errx(1, "virtqueue_get_buf: %p", ret); + assert(i == 2); + + /* Guest puts in a huge descriptor. */ + sg_init_table(guest_sg, RINGSIZE); + for (i = 0; i < RINGSIZE; i++) { + sg_set_buf(&guest_sg[i], + __user_addr_max - USER_MEM/4, USER_MEM/4); + } + + /* Fill contents with recognisable garbage. */ + for (i = 0; i < USER_MEM/4; i++) + ((char *)__user_addr_max - USER_MEM/4)[i] = i; + + /* This will allocate an indirect, so force it to allocate user addr */ + __kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN); + err = virtqueue_add_outbuf(vq, guest_sg, RINGSIZE, &err, GFP_KERNEL); + if (err) + errx(1, "virtqueue_add_outbuf (large): %i", err); + __kmalloc_fake = NULL; + + /* Host picks it up (allocates new iov). */ + vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov)); + vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov)); + + err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head); + if (err != 1) + errx(1, "vringh_getdesc_user: %i", err); + + assert(riov.max_num & VRINGH_IOV_ALLOCATED); + assert(riov.iov != host_riov); + if (getrange != getrange_slow) + assert(riov.used == RINGSIZE); + else + assert(riov.used == RINGSIZE * USER_MEM/4); + + assert(!(wiov.max_num & VRINGH_IOV_ALLOCATED)); + assert(wiov.used == 0); + + /* Pull data back out (in odd chunks), should be as expected. */ + for (i = 0; i < RINGSIZE * USER_MEM/4; i += 3) { + err = vringh_iov_pull_user(&riov, buf, 3); + if (err != 3 && i + err != RINGSIZE * USER_MEM/4) + errx(1, "vringh_iov_pull_user large: %i", err); + assert(buf[0] == (char)i); + assert(err < 2 || buf[1] == (char)(i + 1)); + assert(err < 3 || buf[2] == (char)(i + 2)); + } + assert(riov.i == riov.used); + vringh_iov_cleanup(&riov); + vringh_iov_cleanup(&wiov); + + /* Complete using multi interface, just because we can. */ + used[0].id = head; + used[0].len = 0; + err = vringh_complete_multi_user(&vrh, used, 1); + if (err) + errx(1, "vringh_complete_multi_user(1): %i", err); + + /* Free up those descriptors. */ + ret = virtqueue_get_buf(vq, &i); + if (ret != &err) + errx(1, "virtqueue_get_buf: %p", ret); + + /* Add lots of descriptors. */ + sg_init_table(guest_sg, 1); + sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1); + for (i = 0; i < RINGSIZE; i++) { + err = virtqueue_add_outbuf(vq, guest_sg, 1, &err, GFP_KERNEL); + if (err) + errx(1, "virtqueue_add_outbuf (multiple): %i", err); + } + + /* Now get many, and consume them all at once. */ + vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov)); + vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov)); + + for (i = 0; i < RINGSIZE; i++) { + err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head); + if (err != 1) + errx(1, "vringh_getdesc_user: %i", err); + used[i].id = head; + used[i].len = 0; + } + /* Make sure it wraps around ring, to test! */ + assert(vrh.vring.used->idx % RINGSIZE != 0); + err = vringh_complete_multi_user(&vrh, used, RINGSIZE); + if (err) + errx(1, "vringh_complete_multi_user: %i", err); + + /* Free those buffers. */ + for (i = 0; i < RINGSIZE; i++) { + unsigned len; + assert(virtqueue_get_buf(vq, &len) != NULL); + } + + /* Test weird (but legal!) indirect. */ + if (__virtio_test_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC)) { + char *data = __user_addr_max - USER_MEM/4; + struct vring_desc *d = __user_addr_max - USER_MEM/2; + struct vring vring; + + /* Force creation of direct, which we modify. */ + __virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC); + vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, + false, __user_addr_min, + never_notify_host, + never_callback_guest, + "guest vq"); + + sg_init_table(guest_sg, 4); + sg_set_buf(&guest_sg[0], d, sizeof(*d)*2); + sg_set_buf(&guest_sg[1], d + 2, sizeof(*d)*1); + sg_set_buf(&guest_sg[2], data + 6, 4); + sg_set_buf(&guest_sg[3], d + 3, sizeof(*d)*3); + + err = virtqueue_add_outbuf(vq, guest_sg, 4, &err, GFP_KERNEL); + if (err) + errx(1, "virtqueue_add_outbuf (indirect): %i", err); + + vring_init(&vring, RINGSIZE, __user_addr_min, ALIGN); + + /* They're used in order, but double-check... */ + assert(vring.desc[0].addr == (unsigned long)d); + assert(vring.desc[1].addr == (unsigned long)(d+2)); + assert(vring.desc[2].addr == (unsigned long)data + 6); + assert(vring.desc[3].addr == (unsigned long)(d+3)); + vring.desc[0].flags |= VRING_DESC_F_INDIRECT; + vring.desc[1].flags |= VRING_DESC_F_INDIRECT; + vring.desc[3].flags |= VRING_DESC_F_INDIRECT; + + /* First indirect */ + d[0].addr = (unsigned long)data; + d[0].len = 1; + d[0].flags = VRING_DESC_F_NEXT; + d[0].next = 1; + d[1].addr = (unsigned long)data + 1; + d[1].len = 2; + d[1].flags = 0; + + /* Second indirect */ + d[2].addr = (unsigned long)data + 3; + d[2].len = 3; + d[2].flags = 0; + + /* Third indirect */ + d[3].addr = (unsigned long)data + 10; + d[3].len = 5; + d[3].flags = VRING_DESC_F_NEXT; + d[3].next = 1; + d[4].addr = (unsigned long)data + 15; + d[4].len = 6; + d[4].flags = VRING_DESC_F_NEXT; + d[4].next = 2; + d[5].addr = (unsigned long)data + 21; + d[5].len = 7; + d[5].flags = 0; + + /* Host picks it up (allocates new iov). */ + vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov)); + vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov)); + + err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head); + if (err != 1) + errx(1, "vringh_getdesc_user: %i", err); + + if (head != 0) + errx(1, "vringh_getdesc_user: head %i not 0", head); + + assert(riov.max_num & VRINGH_IOV_ALLOCATED); + if (getrange != getrange_slow) + assert(riov.used == 7); + else + assert(riov.used == 28); + err = vringh_iov_pull_user(&riov, buf, 29); + assert(err == 28); + + /* Data should be linear. */ + for (i = 0; i < err; i++) + assert(buf[i] == i); + vringh_iov_cleanup(&riov); + } + + /* Don't leak memory... */ + vring_del_virtqueue(vq); + free(__user_addr_min); + + return 0; +} |