diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-07 13:13:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-07 13:13:35 +0000 |
commit | 9ea788653ae1a5cea3f3853c5c50b58c98b3cd67 (patch) | |
tree | e1bf3b779c1dc9c5337c4442bf720073a3a53415 /kernel/bpf/ringbuf.c | |
parent | Adding upstream version 6.9.7. (diff) | |
download | linux-9ea788653ae1a5cea3f3853c5c50b58c98b3cd67.tar.xz linux-9ea788653ae1a5cea3f3853c5c50b58c98b3cd67.zip |
Adding upstream version 6.9.8.upstream/6.9.8
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'kernel/bpf/ringbuf.c')
-rw-r--r-- | kernel/bpf/ringbuf.c | 31 |
1 files changed, 25 insertions, 6 deletions
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 0ee653a936..e20b90c361 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -51,7 +51,8 @@ struct bpf_ringbuf { * This prevents a user-space application from modifying the * position and ruining in-kernel tracking. The permissions of the * pages depend on who is producing samples: user-space or the - * kernel. + * kernel. Note that the pending counter is placed in the same + * page as the producer, so that it shares the same cache line. * * Kernel-producer * --------------- @@ -70,6 +71,7 @@ struct bpf_ringbuf { */ unsigned long consumer_pos __aligned(PAGE_SIZE); unsigned long producer_pos __aligned(PAGE_SIZE); + unsigned long pending_pos; char data[] __aligned(PAGE_SIZE); }; @@ -179,6 +181,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) rb->mask = data_sz - 1; rb->consumer_pos = 0; rb->producer_pos = 0; + rb->pending_pos = 0; return rb; } @@ -404,9 +407,9 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr) static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) { - unsigned long cons_pos, prod_pos, new_prod_pos, flags; - u32 len, pg_off; + unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags; struct bpf_ringbuf_hdr *hdr; + u32 len, pg_off, tmp_size, hdr_len; if (unlikely(size > RINGBUF_MAX_RECORD_SZ)) return NULL; @@ -424,13 +427,29 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) spin_lock_irqsave(&rb->spinlock, flags); } + pend_pos = rb->pending_pos; prod_pos = rb->producer_pos; new_prod_pos = prod_pos + len; - /* check for out of ringbuf space by ensuring producer position - * doesn't advance more than (ringbuf_size - 1) ahead + while (pend_pos < prod_pos) { + hdr = (void *)rb->data + (pend_pos & rb->mask); + hdr_len = READ_ONCE(hdr->len); + if (hdr_len & BPF_RINGBUF_BUSY_BIT) + break; + tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT; + tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8); + pend_pos += tmp_size; + } + rb->pending_pos = pend_pos; + + /* check for out of ringbuf space: + * - by ensuring producer position doesn't advance more than + * (ringbuf_size - 1) ahead + * - by ensuring oldest not yet committed record until newest + * record does not span more than (ringbuf_size - 1) */ - if (new_prod_pos - cons_pos > rb->mask) { + if (new_prod_pos - cons_pos > rb->mask || + new_prod_pos - pend_pos > rb->mask) { spin_unlock_irqrestore(&rb->spinlock, flags); return NULL; } |