1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
|
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright 2023 Red Hat
*/
#ifndef UDS_CPU_H
#define UDS_CPU_H
#include <linux/cache.h>
/**
* uds_prefetch_address() - Minimize cache-miss latency by attempting to move data into a CPU cache
* before it is accessed.
*
* @address: the address to fetch (may be invalid)
* @for_write: must be constant at compile time--false if for reading, true if for writing
*/
static inline void uds_prefetch_address(const void *address, bool for_write)
{
/*
* for_write won't be a constant if we are compiled with optimization turned off, in which
* case prefetching really doesn't matter. clang can't figure out that if for_write is a
* constant, it can be passed as the second, mandatorily constant argument to prefetch(),
* at least currently on llvm 12.
*/
if (__builtin_constant_p(for_write)) {
if (for_write)
__builtin_prefetch(address, true);
else
__builtin_prefetch(address, false);
}
}
/**
* uds_prefetch_range() - Minimize cache-miss latency by attempting to move a range of addresses
* into a CPU cache before they are accessed.
*
* @start: the starting address to fetch (may be invalid)
* @size: the number of bytes in the address range
* @for_write: must be constant at compile time--false if for reading, true if for writing
*/
static inline void uds_prefetch_range(const void *start, unsigned int size,
bool for_write)
{
/*
* Count the number of cache lines to fetch, allowing for the address range to span an
* extra cache line boundary due to address alignment.
*/
const char *address = (const char *) start;
unsigned int offset = ((uintptr_t) address % L1_CACHE_BYTES);
unsigned int cache_lines = (1 + ((size + offset) / L1_CACHE_BYTES));
while (cache_lines-- > 0) {
uds_prefetch_address(address, for_write);
address += L1_CACHE_BYTES;
}
}
#endif /* UDS_CPU_H */
|