summaryrefslogtreecommitdiffstats
path: root/tools/hv/vmbus_bufring.c
blob: bac32c1109df86feb41e0e45ad76f18637623f02 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
// SPDX-License-Identifier: BSD-3-Clause
/*
 * Copyright (c) 2009-2012,2016,2023 Microsoft Corp.
 * Copyright (c) 2012 NetApp Inc.
 * Copyright (c) 2012 Citrix Inc.
 * All rights reserved.
 */

#include <errno.h>
#include <fcntl.h>
#include <emmintrin.h>
#include <linux/limits.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/uio.h>
#include <unistd.h>
#include "vmbus_bufring.h"

/**
 * Compiler barrier.
 *
 * Guarantees that operation reordering does not occur at compile time
 * for operations directly before and after the barrier.
 */
#define	rte_compiler_barrier()		({ asm volatile ("" : : : "memory"); })

#define VMBUS_RQST_ERROR	0xFFFFFFFFFFFFFFFF
#define ALIGN(val, align)	((typeof(val))((val) & (~((typeof(val))((align) - 1)))))

void *vmbus_uio_map(int *fd, int size)
{
	void *map;

	map = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
	if (map == MAP_FAILED)
		return NULL;

	return map;
}

/* Increase bufring index by inc with wraparound */
static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz)
{
	idx += inc;
	if (idx >= sz)
		idx -= sz;

	return idx;
}

void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen)
{
	br->vbr = buf;
	br->windex = br->vbr->windex;
	br->dsize = blen - sizeof(struct vmbus_bufring);
}

static inline __always_inline void
rte_smp_mb(void)
{
	asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
}

static inline int
rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
{
	uint8_t res;

	asm volatile("lock ; "
		     "cmpxchgl %[src], %[dst];"
		     "sete %[res];"
		     : [res] "=a" (res),     /* output */
		     [dst] "=m" (*dst)
		     : [src] "r" (src),      /* input */
		     "a" (exp),
		     "m" (*dst)
		     : "memory");            /* no-clobber list */
	return res;
}

static inline uint32_t
vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex,
		  const void *src0, uint32_t cplen)
{
	uint8_t *br_data = tbr->vbr->data;
	uint32_t br_dsize = tbr->dsize;
	const uint8_t *src = src0;

	/* XXX use double mapping like Linux kernel? */
	if (cplen > br_dsize - windex) {
		uint32_t fraglen = br_dsize - windex;

		/* Wrap-around detected */
		memcpy(br_data + windex, src, fraglen);
		memcpy(br_data, src + fraglen, cplen - fraglen);
	} else {
		memcpy(br_data + windex, src, cplen);
	}

	return vmbus_br_idxinc(windex, cplen, br_dsize);
}

/*
 * Write scattered channel packet to TX bufring.
 *
 * The offset of this channel packet is written as a 64bits value
 * immediately after this channel packet.
 *
 * The write goes through three stages:
 *  1. Reserve space in ring buffer for the new data.
 *     Writer atomically moves priv_write_index.
 *  2. Copy the new data into the ring.
 *  3. Update the tail of the ring (visible to host) that indicates
 *     next read location. Writer updates write_index
 */
static int
vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen)
{
	struct vmbus_bufring *vbr = tbr->vbr;
	uint32_t ring_size = tbr->dsize;
	uint32_t old_windex, next_windex, windex, total;
	uint64_t save_windex;
	int i;

	total = 0;
	for (i = 0; i < iovlen; i++)
		total += iov[i].iov_len;
	total += sizeof(save_windex);

	/* Reserve space in ring */
	do {
		uint32_t avail;

		/* Get current free location */
		old_windex = tbr->windex;

		/* Prevent compiler reordering this with calculation */
		rte_compiler_barrier();

		avail = vmbus_br_availwrite(tbr, old_windex);

		/* If not enough space in ring, then tell caller. */
		if (avail <= total)
			return -EAGAIN;

		next_windex = vmbus_br_idxinc(old_windex, total, ring_size);

		/* Atomic update of next write_index for other threads */
	} while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex));

	/* Space from old..new is now reserved */
	windex = old_windex;
	for (i = 0; i < iovlen; i++)
		windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len);

	/* Set the offset of the current channel packet. */
	save_windex = ((uint64_t)old_windex) << 32;
	windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
				   sizeof(save_windex));

	/* The region reserved should match region used */
	if (windex != next_windex)
		return -EINVAL;

	/* Ensure that data is available before updating host index */
	rte_compiler_barrier();

	/* Checkin for our reservation. wait for our turn to update host */
	while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex))
		_mm_pause();

	return 0;
}

int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data,
			uint32_t dlen, uint32_t flags)
{
	struct vmbus_chanpkt pkt;
	unsigned int pktlen, pad_pktlen;
	const uint32_t hlen = sizeof(pkt);
	uint64_t pad = 0;
	struct iovec iov[3];
	int error;

	pktlen = hlen + dlen;
	pad_pktlen = ALIGN(pktlen, sizeof(uint64_t));

	pkt.hdr.type = type;
	pkt.hdr.flags = flags;
	pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
	pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
	pkt.hdr.xactid = VMBUS_RQST_ERROR;

	iov[0].iov_base = &pkt;
	iov[0].iov_len = hlen;
	iov[1].iov_base = data;
	iov[1].iov_len = dlen;
	iov[2].iov_base = &pad;
	iov[2].iov_len = pad_pktlen - pktlen;

	error = vmbus_txbr_write(txbr, iov, 3);

	return error;
}

static inline uint32_t
vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex,
		    void *dst0, size_t cplen)
{
	const uint8_t *br_data = rbr->vbr->data;
	uint32_t br_dsize = rbr->dsize;
	uint8_t *dst = dst0;

	if (cplen > br_dsize - rindex) {
		uint32_t fraglen = br_dsize - rindex;

		/* Wrap-around detected. */
		memcpy(dst, br_data + rindex, fraglen);
		memcpy(dst + fraglen, br_data, cplen - fraglen);
	} else {
		memcpy(dst, br_data + rindex, cplen);
	}

	return vmbus_br_idxinc(rindex, cplen, br_dsize);
}

/* Copy data from receive ring but don't change index */
static int
vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen)
{
	uint32_t avail;

	/*
	 * The requested data and the 64bits channel packet
	 * offset should be there at least.
	 */
	avail = vmbus_br_availread(rbr);
	if (avail < dlen + sizeof(uint64_t))
		return -EAGAIN;

	vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen);
	return 0;
}

/*
 * Copy data from receive ring and change index
 * NOTE:
 * We assume (dlen + skip) == sizeof(channel packet).
 */
static int
vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip)
{
	struct vmbus_bufring *vbr = rbr->vbr;
	uint32_t br_dsize = rbr->dsize;
	uint32_t rindex;

	if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t))
		return -EAGAIN;

	/* Record where host was when we started read (for debug) */
	rbr->windex = rbr->vbr->windex;

	/*
	 * Copy channel packet from RX bufring.
	 */
	rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize);
	rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen);

	/*
	 * Discard this channel packet's 64bits offset, which is useless to us.
	 */
	rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize);

	/* Update the read index _after_ the channel packet is fetched.	 */
	rte_compiler_barrier();

	vbr->rindex = rindex;

	return 0;
}

int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr,
			    void *data, uint32_t *len)
{
	struct vmbus_chanpkt_hdr pkt;
	uint32_t dlen, bufferlen = *len;
	int error;

	error = vmbus_rxbr_peek(rxbr, &pkt, sizeof(pkt));
	if (error)
		return error;

	if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN))
		/* XXX this channel is dead actually. */
		return -EIO;

	if (unlikely(pkt.hlen > pkt.tlen))
		return -EIO;

	/* Length are in quad words */
	dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
	*len = dlen;

	/* If caller buffer is not large enough */
	if (unlikely(dlen > bufferlen))
		return -ENOBUFS;

	/* Read data and skip packet header */
	error = vmbus_rxbr_read(rxbr, data, dlen, 0);
	if (error)
		return error;

	/* Return the number of bytes read */
	return dlen + sizeof(uint64_t);
}