summaryrefslogtreecommitdiffstats
path: root/tools/hv/vmbus_bufring.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/hv/vmbus_bufring.c')
-rw-r--r--tools/hv/vmbus_bufring.c318
1 files changed, 318 insertions, 0 deletions
diff --git a/tools/hv/vmbus_bufring.c b/tools/hv/vmbus_bufring.c
new file mode 100644
index 0000000000..bac32c1109
--- /dev/null
+++ b/tools/hv/vmbus_bufring.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2009-2012,2016,2023 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <emmintrin.h>
+#include <linux/limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include "vmbus_bufring.h"
+
+/**
+ * Compiler barrier.
+ *
+ * Guarantees that operation reordering does not occur at compile time
+ * for operations directly before and after the barrier.
+ */
+#define rte_compiler_barrier() ({ asm volatile ("" : : : "memory"); })
+
+#define VMBUS_RQST_ERROR 0xFFFFFFFFFFFFFFFF
+#define ALIGN(val, align) ((typeof(val))((val) & (~((typeof(val))((align) - 1)))))
+
+void *vmbus_uio_map(int *fd, int size)
+{
+ void *map;
+
+ map = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
+ if (map == MAP_FAILED)
+ return NULL;
+
+ return map;
+}
+
+/* Increase bufring index by inc with wraparound */
+static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz)
+{
+ idx += inc;
+ if (idx >= sz)
+ idx -= sz;
+
+ return idx;
+}
+
+void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen)
+{
+ br->vbr = buf;
+ br->windex = br->vbr->windex;
+ br->dsize = blen - sizeof(struct vmbus_bufring);
+}
+
+static inline __always_inline void
+rte_smp_mb(void)
+{
+ asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
+}
+
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+ uint8_t res;
+
+ asm volatile("lock ; "
+ "cmpxchgl %[src], %[dst];"
+ "sete %[res];"
+ : [res] "=a" (res), /* output */
+ [dst] "=m" (*dst)
+ : [src] "r" (src), /* input */
+ "a" (exp),
+ "m" (*dst)
+ : "memory"); /* no-clobber list */
+ return res;
+}
+
+static inline uint32_t
+vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex,
+ const void *src0, uint32_t cplen)
+{
+ uint8_t *br_data = tbr->vbr->data;
+ uint32_t br_dsize = tbr->dsize;
+ const uint8_t *src = src0;
+
+ /* XXX use double mapping like Linux kernel? */
+ if (cplen > br_dsize - windex) {
+ uint32_t fraglen = br_dsize - windex;
+
+ /* Wrap-around detected */
+ memcpy(br_data + windex, src, fraglen);
+ memcpy(br_data, src + fraglen, cplen - fraglen);
+ } else {
+ memcpy(br_data + windex, src, cplen);
+ }
+
+ return vmbus_br_idxinc(windex, cplen, br_dsize);
+}
+
+/*
+ * Write scattered channel packet to TX bufring.
+ *
+ * The offset of this channel packet is written as a 64bits value
+ * immediately after this channel packet.
+ *
+ * The write goes through three stages:
+ * 1. Reserve space in ring buffer for the new data.
+ * Writer atomically moves priv_write_index.
+ * 2. Copy the new data into the ring.
+ * 3. Update the tail of the ring (visible to host) that indicates
+ * next read location. Writer updates write_index
+ */
+static int
+vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen)
+{
+ struct vmbus_bufring *vbr = tbr->vbr;
+ uint32_t ring_size = tbr->dsize;
+ uint32_t old_windex, next_windex, windex, total;
+ uint64_t save_windex;
+ int i;
+
+ total = 0;
+ for (i = 0; i < iovlen; i++)
+ total += iov[i].iov_len;
+ total += sizeof(save_windex);
+
+ /* Reserve space in ring */
+ do {
+ uint32_t avail;
+
+ /* Get current free location */
+ old_windex = tbr->windex;
+
+ /* Prevent compiler reordering this with calculation */
+ rte_compiler_barrier();
+
+ avail = vmbus_br_availwrite(tbr, old_windex);
+
+ /* If not enough space in ring, then tell caller. */
+ if (avail <= total)
+ return -EAGAIN;
+
+ next_windex = vmbus_br_idxinc(old_windex, total, ring_size);
+
+ /* Atomic update of next write_index for other threads */
+ } while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex));
+
+ /* Space from old..new is now reserved */
+ windex = old_windex;
+ for (i = 0; i < iovlen; i++)
+ windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len);
+
+ /* Set the offset of the current channel packet. */
+ save_windex = ((uint64_t)old_windex) << 32;
+ windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
+ sizeof(save_windex));
+
+ /* The region reserved should match region used */
+ if (windex != next_windex)
+ return -EINVAL;
+
+ /* Ensure that data is available before updating host index */
+ rte_compiler_barrier();
+
+ /* Checkin for our reservation. wait for our turn to update host */
+ while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex))
+ _mm_pause();
+
+ return 0;
+}
+
+int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data,
+ uint32_t dlen, uint32_t flags)
+{
+ struct vmbus_chanpkt pkt;
+ unsigned int pktlen, pad_pktlen;
+ const uint32_t hlen = sizeof(pkt);
+ uint64_t pad = 0;
+ struct iovec iov[3];
+ int error;
+
+ pktlen = hlen + dlen;
+ pad_pktlen = ALIGN(pktlen, sizeof(uint64_t));
+
+ pkt.hdr.type = type;
+ pkt.hdr.flags = flags;
+ pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
+ pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
+ pkt.hdr.xactid = VMBUS_RQST_ERROR;
+
+ iov[0].iov_base = &pkt;
+ iov[0].iov_len = hlen;
+ iov[1].iov_base = data;
+ iov[1].iov_len = dlen;
+ iov[2].iov_base = &pad;
+ iov[2].iov_len = pad_pktlen - pktlen;
+
+ error = vmbus_txbr_write(txbr, iov, 3);
+
+ return error;
+}
+
+static inline uint32_t
+vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex,
+ void *dst0, size_t cplen)
+{
+ const uint8_t *br_data = rbr->vbr->data;
+ uint32_t br_dsize = rbr->dsize;
+ uint8_t *dst = dst0;
+
+ if (cplen > br_dsize - rindex) {
+ uint32_t fraglen = br_dsize - rindex;
+
+ /* Wrap-around detected. */
+ memcpy(dst, br_data + rindex, fraglen);
+ memcpy(dst + fraglen, br_data, cplen - fraglen);
+ } else {
+ memcpy(dst, br_data + rindex, cplen);
+ }
+
+ return vmbus_br_idxinc(rindex, cplen, br_dsize);
+}
+
+/* Copy data from receive ring but don't change index */
+static int
+vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen)
+{
+ uint32_t avail;
+
+ /*
+ * The requested data and the 64bits channel packet
+ * offset should be there at least.
+ */
+ avail = vmbus_br_availread(rbr);
+ if (avail < dlen + sizeof(uint64_t))
+ return -EAGAIN;
+
+ vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen);
+ return 0;
+}
+
+/*
+ * Copy data from receive ring and change index
+ * NOTE:
+ * We assume (dlen + skip) == sizeof(channel packet).
+ */
+static int
+vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip)
+{
+ struct vmbus_bufring *vbr = rbr->vbr;
+ uint32_t br_dsize = rbr->dsize;
+ uint32_t rindex;
+
+ if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t))
+ return -EAGAIN;
+
+ /* Record where host was when we started read (for debug) */
+ rbr->windex = rbr->vbr->windex;
+
+ /*
+ * Copy channel packet from RX bufring.
+ */
+ rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize);
+ rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen);
+
+ /*
+ * Discard this channel packet's 64bits offset, which is useless to us.
+ */
+ rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize);
+
+ /* Update the read index _after_ the channel packet is fetched. */
+ rte_compiler_barrier();
+
+ vbr->rindex = rindex;
+
+ return 0;
+}
+
+int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr,
+ void *data, uint32_t *len)
+{
+ struct vmbus_chanpkt_hdr pkt;
+ uint32_t dlen, bufferlen = *len;
+ int error;
+
+ error = vmbus_rxbr_peek(rxbr, &pkt, sizeof(pkt));
+ if (error)
+ return error;
+
+ if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN))
+ /* XXX this channel is dead actually. */
+ return -EIO;
+
+ if (unlikely(pkt.hlen > pkt.tlen))
+ return -EIO;
+
+ /* Length are in quad words */
+ dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
+ *len = dlen;
+
+ /* If caller buffer is not large enough */
+ if (unlikely(dlen > bufferlen))
+ return -ENOBUFS;
+
+ /* Read data and skip packet header */
+ error = vmbus_rxbr_read(rxbr, data, dlen, 0);
+ if (error)
+ return error;
+
+ /* Return the number of bytes read */
+ return dlen + sizeof(uint64_t);
+}