/*- * BSD LICENSE * * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _RTE_VHOST_H_ #define _RTE_VHOST_H_ /** * @file * Interface to vhost-user */ #include #include #include #include #include #include #include #define RTE_VHOST_USER_CLIENT (1ULL << 0) #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1) #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2) /** * Information relating to memory regions including offsets to * addresses in QEMUs memory file. */ struct rte_vhost_mem_region { uint64_t guest_phys_addr; uint64_t guest_user_addr; uint64_t host_user_addr; uint64_t size; void *mmap_addr; uint64_t mmap_size; int fd; }; /** * Memory structure includes region and mapping information. */ struct rte_vhost_memory { uint32_t nregions; struct rte_vhost_mem_region regions[0]; }; struct rte_vhost_vring { struct vring_desc *desc; struct vring_avail *avail; struct vring_used *used; uint64_t log_guest_addr; int callfd; int kickfd; uint16_t size; uint16_t last_avail_idx; uint16_t last_used_idx; }; /** * Device and vring operations. */ struct vhost_device_ops { int (*new_device)(int vid); /**< Add device. */ void (*destroy_device)(int vid); /**< Remove device. */ int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */ /** * Features could be changed after the feature negotiation. * For example, VHOST_F_LOG_ALL will be set/cleared at the * start/end of live migration, respectively. This callback * is used to inform the application on such change. */ int (*features_changed)(int vid, uint64_t features); int (*vhost_nvme_admin_passthrough)(int vid, void *cmd, void *cqe, void *buf); int (*vhost_nvme_set_cq_call)(int vid, uint16_t qid, int fd); int (*vhost_nvme_get_cap)(int vid, uint64_t *cap); int (*new_connection)(int vid); void (*destroy_connection)(int vid); int (*get_config)(int vid, uint8_t *config, uint32_t config_len); int (*set_config)(int vid, uint8_t *config, uint32_t offset, uint32_t len, uint32_t flags); void *reserved[2]; /**< Reserved for future extension */ }; /** * Convert guest physical address to host virtual address * * @param mem * the guest memory regions * @param gpa * the guest physical address for querying * @return * the host virtual address on success, 0 on failure */ static inline uint64_t __attribute__((always_inline)) rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa) { struct rte_vhost_mem_region *reg; uint32_t i; for (i = 0; i < mem->nregions; i++) { reg = &mem->regions[i]; if (gpa >= reg->guest_phys_addr && gpa < reg->guest_phys_addr + reg->size) { return gpa - reg->guest_phys_addr + reg->host_user_addr; } } return 0; } /** * Convert guest physical address to host virtual address safely * * This variant of rte_vhost_gpa_to_vva() takes care all the * requested length is mapped and contiguous in process address * space. * * @param mem * the guest memory regions * @param gpa * the guest physical address for querying * @param len * the size of the requested area to map, * updated with actual size mapped * @return * the host virtual address on success, 0 on failure */ static inline uint64_t rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem, uint64_t gpa, uint64_t *len) { struct rte_vhost_mem_region *r; uint32_t i; for (i = 0; i < mem->nregions; i++) { r = &mem->regions[i]; if (gpa >= r->guest_phys_addr && gpa < r->guest_phys_addr + r->size) { if (unlikely(*len > r->guest_phys_addr + r->size - gpa)) *len = r->guest_phys_addr + r->size - gpa; return gpa - r->guest_phys_addr + r->host_user_addr; } } *len = 0; return 0; } #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL)) /** * Log the memory write start with given address. * * This function only need be invoked when the live migration starts. * Therefore, we won't need call it at all in the most of time. For * making the performance impact be minimum, it's suggested to do a * check before calling it: * * if (unlikely(RTE_VHOST_NEED_LOG(features))) * rte_vhost_log_write(vid, addr, len); * * @param vid * vhost device ID * @param addr * the starting address for write * @param len * the length to write */ void rte_vhost_log_write(int vid, uint64_t addr, uint64_t len); /** * Log the used ring update start at given offset. * * Same as rte_vhost_log_write, it's suggested to do a check before * calling it: * * if (unlikely(RTE_VHOST_NEED_LOG(features))) * rte_vhost_log_used_vring(vid, vring_idx, offset, len); * * @param vid * vhost device ID * @param vring_idx * the vring index * @param offset * the offset inside the used ring * @param len * the length to write */ void rte_vhost_log_used_vring(int vid, uint16_t vring_idx, uint64_t offset, uint64_t len); int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable); /** * Register vhost driver. path could be different for multiple * instance support. */ int rte_vhost_driver_register(const char *path, uint64_t flags); /* Unregister vhost driver. This is only meaningful to vhost user. */ int rte_vhost_driver_unregister(const char *path); /** * Set the feature bits the vhost-user driver supports. * * @param path * The vhost-user socket file path * @return * 0 on success, -1 on failure */ int rte_vhost_driver_set_features(const char *path, uint64_t features); /** * Enable vhost-user driver features. * * Note that * - the param @features should be a subset of the feature bits provided * by rte_vhost_driver_set_features(). * - it must be invoked before vhost-user negotiation starts. * * @param path * The vhost-user socket file path * @param features * Features to enable * @return * 0 on success, -1 on failure */ int rte_vhost_driver_enable_features(const char *path, uint64_t features); /** * Disable vhost-user driver features. * * The two notes at rte_vhost_driver_enable_features() also apply here. * * @param path * The vhost-user socket file path * @param features * Features to disable * @return * 0 on success, -1 on failure */ int rte_vhost_driver_disable_features(const char *path, uint64_t features); /** * Get the feature bits before feature negotiation. * * @param path * The vhost-user socket file path * @param features * A pointer to store the queried feature bits * @return * 0 on success, -1 on failure */ int rte_vhost_driver_get_features(const char *path, uint64_t *features); /** * Get the feature bits after negotiation * * @param vid * Vhost device ID * @param features * A pointer to store the queried feature bits * @return * 0 on success, -1 on failure */ int rte_vhost_get_negotiated_features(int vid, uint64_t *features); /* Register callbacks. */ int rte_vhost_driver_callback_register(const char *path, struct vhost_device_ops const * const ops); /** * * Start the vhost-user driver. * * This function triggers the vhost-user negotiation. * * @param path * The vhost-user socket file path * @return * 0 on success, -1 on failure */ int rte_vhost_driver_start(const char *path); /** * Get the MTU value of the device if set in QEMU. * * @param vid * virtio-net device ID * @param mtu * The variable to store the MTU value * * @return * 0: success * -EAGAIN: device not yet started * -ENOTSUP: device does not support MTU feature */ int rte_vhost_get_mtu(int vid, uint16_t *mtu); /** * Get the numa node from which the virtio net device's memory * is allocated. * * @param vid * vhost device ID * * @return * The numa node, -1 on failure */ int rte_vhost_get_numa_node(int vid); /** * Get the virtio net device's ifname, which is the vhost-user socket * file path. * * @param vid * vhost device ID * @param buf * The buffer to stored the queried ifname * @param len * The length of buf * * @return * 0 on success, -1 on failure */ int rte_vhost_get_ifname(int vid, char *buf, size_t len); /** * Get how many avail entries are left in the queue * * @param vid * vhost device ID * @param queue_id * virtio queue index * * @return * num of avail entires left */ uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id); struct rte_mbuf; struct rte_mempool; /** * This function adds buffers to the virtio devices RX virtqueue. Buffers can * be received from the physical port or from another virtual device. A packet * count is returned to indicate the number of packets that were succesfully * added to the RX queue. * @param vid * vhost device ID * @param queue_id * virtio queue index in mq case * @param pkts * array to contain packets to be enqueued * @param count * packets num to be enqueued * @return * num of packets enqueued */ uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id, struct rte_mbuf **pkts, uint16_t count); /** * This function gets guest buffers from the virtio device TX virtqueue, * construct host mbufs, copies guest buffer content to host mbufs and * store them in pkts to be processed. * @param vid * vhost device ID * @param queue_id * virtio queue index in mq case * @param mbuf_pool * mbuf_pool where host mbuf is allocated. * @param pkts * array to contain packets to be dequeued * @param count * packets num to be dequeued * @return * num of packets dequeued */ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count); /** * Get guest mem table: a list of memory regions. * * An rte_vhost_vhost_memory object will be allocated internaly, to hold the * guest memory regions. Application should free it at destroy_device() * callback. * * @param vid * vhost device ID * @param mem * To store the returned mem regions * @return * 0 on success, -1 on failure */ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); /** * Get guest vring info, including the vring address, vring size, etc. * * @param vid * vhost device ID * @param vring_idx * vring index * @param vring * the structure to hold the requested vring info * @return * 0 on success, -1 on failure */ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, struct rte_vhost_vring *vring); /** * Set id of the last descriptors in avail and used guest vrings. * * In case user application operates directly on buffers, it should use this * function on device destruction to retrieve the same values later on in device * creation via rte_vhost_get_vhost_vring(int, uint16_t, struct rte_vhost_vring *) * * @param vid * vhost device ID * @param vring_idx * vring index * @param last_avail_idx * id of the last descriptor in avail ring to be set * @param last_used_idx * id of the last descriptor in used ring to be set * @return * 0 on success, -1 on failure */ int rte_vhost_set_vhost_vring_last_idx(int vid, uint16_t vring_idx, uint16_t last_avail_idx, uint16_t last_used_idx); #endif /* _RTE_VHOST_H_ */