summaryrefslogtreecommitdiffstats
path: root/kernel/bpf/memalloc.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 18:50:12 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 18:50:12 +0000
commit8665bd53f2f2e27e5511d90428cb3f60e6d0ce15 (patch)
tree8d58900dc0ebd4a3011f92c128d2fe45bc7c4bf2 /kernel/bpf/memalloc.c
parentAdding debian version 6.7.12-1. (diff)
downloadlinux-8665bd53f2f2e27e5511d90428cb3f60e6d0ce15.tar.xz
linux-8665bd53f2f2e27e5511d90428cb3f60e6d0ce15.zip
Merging upstream version 6.8.9.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'kernel/bpf/memalloc.c')
-rw-r--r--kernel/bpf/memalloc.c93
1 files changed, 81 insertions, 12 deletions
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index aa0fbf000a..550f02e2cb 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -121,6 +121,8 @@ struct bpf_mem_caches {
struct bpf_mem_cache cache[NUM_CACHES];
};
+static const u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
+
static struct llist_node notrace *__llist_del_first(struct llist_head *head)
{
struct llist_node *entry, *next;
@@ -462,11 +464,17 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c)
* consume ~ 11 Kbyte per cpu.
* Typical case will be between 11K and 116K closer to 11K.
* bpf progs can and should share bpf_mem_cache when possible.
+ *
+ * Percpu allocation is typically rare. To avoid potential unnecessary large
+ * memory consumption, set low_mark = 1 and high_mark = 3, resulting in c->batch = 1.
*/
static void init_refill_work(struct bpf_mem_cache *c)
{
init_irq_work(&c->refill_work, bpf_mem_refill);
- if (c->unit_size <= 256) {
+ if (c->percpu_size) {
+ c->low_watermark = 1;
+ c->high_watermark = 3;
+ } else if (c->unit_size <= 256) {
c->low_watermark = 32;
c->high_watermark = 96;
} else {
@@ -483,11 +491,16 @@ static void init_refill_work(struct bpf_mem_cache *c)
static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
{
- /* To avoid consuming memory assume that 1st run of bpf
- * prog won't be doing more than 4 map_update_elem from
- * irq disabled region
+ int cnt = 1;
+
+ /* To avoid consuming memory, for non-percpu allocation, assume that
+ * 1st run of bpf prog won't be doing more than 4 map_update_elem from
+ * irq disabled region if unit size is less than or equal to 256.
+ * For all other cases, let us just do one allocation.
*/
- alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);
+ if (!c->percpu_size && c->unit_size <= 256)
+ cnt = 4;
+ alloc_bulk(c, cnt, cpu_to_node(cpu), false);
}
/* When size != 0 bpf_mem_cache for each cpu.
@@ -499,12 +512,14 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
*/
int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
{
- static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
struct bpf_mem_caches *cc, __percpu *pcc;
struct bpf_mem_cache *c, __percpu *pc;
struct obj_cgroup *objcg = NULL;
int cpu, i, unit_size, percpu_size = 0;
+ if (percpu && size == 0)
+ return -EINVAL;
+
/* room for llist_node and per-cpu pointer */
if (percpu)
percpu_size = LLIST_NODE_SZ + sizeof(void *);
@@ -523,6 +538,8 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
if (memcg_bpf_enabled())
objcg = get_obj_cgroup_from_current();
#endif
+ ma->objcg = objcg;
+
for_each_possible_cpu(cpu) {
c = per_cpu_ptr(pc, cpu);
c->unit_size = unit_size;
@@ -542,6 +559,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
#ifdef CONFIG_MEMCG_KMEM
objcg = get_obj_cgroup_from_current();
#endif
+ ma->objcg = objcg;
for_each_possible_cpu(cpu) {
cc = per_cpu_ptr(pcc, cpu);
for (i = 0; i < NUM_CACHES; i++) {
@@ -560,6 +578,56 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
return 0;
}
+int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg)
+{
+ struct bpf_mem_caches __percpu *pcc;
+
+ pcc = __alloc_percpu_gfp(sizeof(struct bpf_mem_caches), 8, GFP_KERNEL);
+ if (!pcc)
+ return -ENOMEM;
+
+ ma->caches = pcc;
+ ma->objcg = objcg;
+ ma->percpu = true;
+ return 0;
+}
+
+int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size)
+{
+ struct bpf_mem_caches *cc, __percpu *pcc;
+ int cpu, i, unit_size, percpu_size;
+ struct obj_cgroup *objcg;
+ struct bpf_mem_cache *c;
+
+ i = bpf_mem_cache_idx(size);
+ if (i < 0)
+ return -EINVAL;
+
+ /* room for llist_node and per-cpu pointer */
+ percpu_size = LLIST_NODE_SZ + sizeof(void *);
+
+ unit_size = sizes[i];
+ objcg = ma->objcg;
+ pcc = ma->caches;
+
+ for_each_possible_cpu(cpu) {
+ cc = per_cpu_ptr(pcc, cpu);
+ c = &cc->cache[i];
+ if (c->unit_size)
+ break;
+
+ c->unit_size = unit_size;
+ c->objcg = objcg;
+ c->percpu_size = percpu_size;
+ c->tgt = c;
+
+ init_refill_work(c);
+ prefill_mem_cache(c, cpu);
+ }
+
+ return 0;
+}
+
static void drain_mem_cache(struct bpf_mem_cache *c)
{
bool percpu = !!c->percpu_size;
@@ -691,9 +759,8 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
rcu_in_progress += atomic_read(&c->call_rcu_ttrace_in_progress);
rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
}
- /* objcg is the same across cpus */
- if (c->objcg)
- obj_cgroup_put(c->objcg);
+ if (ma->objcg)
+ obj_cgroup_put(ma->objcg);
destroy_mem_alloc(ma, rcu_in_progress);
}
if (ma->caches) {
@@ -709,8 +776,8 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
}
}
- if (c->objcg)
- obj_cgroup_put(c->objcg);
+ if (ma->objcg)
+ obj_cgroup_put(ma->objcg);
destroy_mem_alloc(ma, rcu_in_progress);
}
}
@@ -833,7 +900,9 @@ void notrace *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size)
if (!size)
return NULL;
- idx = bpf_mem_cache_idx(size + LLIST_NODE_SZ);
+ if (!ma->percpu)
+ size += LLIST_NODE_SZ;
+ idx = bpf_mem_cache_idx(size);
if (idx < 0)
return NULL;