diff options
Diffstat (limited to '')
-rw-r--r-- | kernel/bpf/helpers.c | 751 |
1 files changed, 751 insertions, 0 deletions
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c new file mode 100644 index 000000000..0efe7c7bf --- /dev/null +++ b/kernel/bpf/helpers.c @@ -0,0 +1,751 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + */ +#include <linux/bpf.h> +#include <linux/rcupdate.h> +#include <linux/random.h> +#include <linux/smp.h> +#include <linux/topology.h> +#include <linux/ktime.h> +#include <linux/sched.h> +#include <linux/uidgid.h> +#include <linux/filter.h> +#include <linux/ctype.h> +#include <linux/jiffies.h> +#include <linux/pid_namespace.h> +#include <linux/proc_ns.h> +#include <linux/security.h> + +#include "../../lib/kstrtox.h" + +/* If kernel subsystem is allowing eBPF programs to call this function, + * inside its own verifier_ops->get_func_proto() callback it should return + * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments + * + * Different map implementations will rely on rcu in map methods + * lookup/update/delete, therefore eBPF programs must run under rcu lock + * if program is allowed to access maps, so check rcu_read_lock_held in + * all three functions. + */ +BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return (unsigned long) map->ops->map_lookup_elem(map, key); +} + +const struct bpf_func_proto bpf_map_lookup_elem_proto = { + .func = bpf_map_lookup_elem, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, +}; + +BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, + void *, value, u64, flags) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return map->ops->map_update_elem(map, key, value, flags); +} + +const struct bpf_func_proto bpf_map_update_elem_proto = { + .func = bpf_map_update_elem, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, + .arg3_type = ARG_PTR_TO_MAP_VALUE, + .arg4_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return map->ops->map_delete_elem(map, key); +} + +const struct bpf_func_proto bpf_map_delete_elem_proto = { + .func = bpf_map_delete_elem, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, +}; + +BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) +{ + return map->ops->map_push_elem(map, value, flags); +} + +const struct bpf_func_proto bpf_map_push_elem_proto = { + .func = bpf_map_push_elem, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_VALUE, + .arg3_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) +{ + return map->ops->map_pop_elem(map, value); +} + +const struct bpf_func_proto bpf_map_pop_elem_proto = { + .func = bpf_map_pop_elem, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, +}; + +BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) +{ + return map->ops->map_peek_elem(map, value); +} + +const struct bpf_func_proto bpf_map_peek_elem_proto = { + .func = bpf_map_peek_elem, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, +}; + +const struct bpf_func_proto bpf_get_prandom_u32_proto = { + .func = bpf_user_rnd_u32, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_0(bpf_get_smp_processor_id) +{ + return smp_processor_id(); +} + +const struct bpf_func_proto bpf_get_smp_processor_id_proto = { + .func = bpf_get_smp_processor_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_0(bpf_get_numa_node_id) +{ + return numa_node_id(); +} + +const struct bpf_func_proto bpf_get_numa_node_id_proto = { + .func = bpf_get_numa_node_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_0(bpf_ktime_get_ns) +{ + /* NMI safe access to clock monotonic */ + return ktime_get_mono_fast_ns(); +} + +const struct bpf_func_proto bpf_ktime_get_ns_proto = { + .func = bpf_ktime_get_ns, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_0(bpf_ktime_get_boot_ns) +{ + /* NMI safe access to clock boottime */ + return ktime_get_boot_fast_ns(); +} + +const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { + .func = bpf_ktime_get_boot_ns, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_0(bpf_get_current_pid_tgid) +{ + struct task_struct *task = current; + + if (unlikely(!task)) + return -EINVAL; + + return (u64) task->tgid << 32 | task->pid; +} + +const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { + .func = bpf_get_current_pid_tgid, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_0(bpf_get_current_uid_gid) +{ + struct task_struct *task = current; + kuid_t uid; + kgid_t gid; + + if (unlikely(!task)) + return -EINVAL; + + current_uid_gid(&uid, &gid); + return (u64) from_kgid(&init_user_ns, gid) << 32 | + from_kuid(&init_user_ns, uid); +} + +const struct bpf_func_proto bpf_get_current_uid_gid_proto = { + .func = bpf_get_current_uid_gid, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) +{ + struct task_struct *task = current; + + if (unlikely(!task)) + goto err_clear; + + strncpy(buf, task->comm, size); + + /* Verifier guarantees that size > 0. For task->comm exceeding + * size, guarantee that buf is %NUL-terminated. Unconditionally + * done here to save the size test. + */ + buf[size - 1] = 0; + return 0; +err_clear: + memset(buf, 0, size); + return -EINVAL; +} + +const struct bpf_func_proto bpf_get_current_comm_proto = { + .func = bpf_get_current_comm, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE, +}; + +#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) + +static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) +{ + arch_spinlock_t *l = (void *)lock; + union { + __u32 val; + arch_spinlock_t lock; + } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; + + compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); + BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); + BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); + arch_spin_lock(l); +} + +static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) +{ + arch_spinlock_t *l = (void *)lock; + + arch_spin_unlock(l); +} + +#else + +static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) +{ + atomic_t *l = (void *)lock; + + BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); + do { + atomic_cond_read_relaxed(l, !VAL); + } while (atomic_xchg(l, 1)); +} + +static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) +{ + atomic_t *l = (void *)lock; + + atomic_set_release(l, 0); +} + +#endif + +static DEFINE_PER_CPU(unsigned long, irqsave_flags); + +notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +{ + unsigned long flags; + + local_irq_save(flags); + __bpf_spin_lock(lock); + __this_cpu_write(irqsave_flags, flags); + return 0; +} + +const struct bpf_func_proto bpf_spin_lock_proto = { + .func = bpf_spin_lock, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_SPIN_LOCK, +}; + +notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +{ + unsigned long flags; + + flags = __this_cpu_read(irqsave_flags); + __bpf_spin_unlock(lock); + local_irq_restore(flags); + return 0; +} + +const struct bpf_func_proto bpf_spin_unlock_proto = { + .func = bpf_spin_unlock, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_SPIN_LOCK, +}; + +void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, + bool lock_src) +{ + struct bpf_spin_lock *lock; + + if (lock_src) + lock = src + map->spin_lock_off; + else + lock = dst + map->spin_lock_off; + preempt_disable(); + ____bpf_spin_lock(lock); + copy_map_value(map, dst, src); + ____bpf_spin_unlock(lock); + preempt_enable(); +} + +BPF_CALL_0(bpf_jiffies64) +{ + return get_jiffies_64(); +} + +const struct bpf_func_proto bpf_jiffies64_proto = { + .func = bpf_jiffies64, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +#ifdef CONFIG_CGROUPS +BPF_CALL_0(bpf_get_current_cgroup_id) +{ + struct cgroup *cgrp = task_dfl_cgroup(current); + + return cgroup_id(cgrp); +} + +const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { + .func = bpf_get_current_cgroup_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level) +{ + struct cgroup *cgrp = task_dfl_cgroup(current); + struct cgroup *ancestor; + + ancestor = cgroup_ancestor(cgrp, ancestor_level); + if (!ancestor) + return 0; + return cgroup_id(ancestor); +} + +const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { + .func = bpf_get_current_ancestor_cgroup_id, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, +}; + +#ifdef CONFIG_CGROUP_BPF +DECLARE_PER_CPU(struct bpf_cgroup_storage_info, + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); + +BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) +{ + /* flags argument is not used now, + * but provides an ability to extend the API. + * verifier checks that its value is correct. + */ + enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); + struct bpf_cgroup_storage *storage = NULL; + void *ptr; + int i; + + for (i = BPF_CGROUP_STORAGE_NEST_MAX - 1; i >= 0; i--) { + if (likely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) + continue; + + storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); + break; + } + + if (stype == BPF_CGROUP_STORAGE_SHARED) + ptr = &READ_ONCE(storage->buf)->data[0]; + else + ptr = this_cpu_ptr(storage->percpu_buf); + + return (unsigned long)ptr; +} + +const struct bpf_func_proto bpf_get_local_storage_proto = { + .func = bpf_get_local_storage, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; +#endif + +#define BPF_STRTOX_BASE_MASK 0x1F + +static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags, + unsigned long long *res, bool *is_negative) +{ + unsigned int base = flags & BPF_STRTOX_BASE_MASK; + const char *cur_buf = buf; + size_t cur_len = buf_len; + unsigned int consumed; + size_t val_len; + char str[64]; + + if (!buf || !buf_len || !res || !is_negative) + return -EINVAL; + + if (base != 0 && base != 8 && base != 10 && base != 16) + return -EINVAL; + + if (flags & ~BPF_STRTOX_BASE_MASK) + return -EINVAL; + + while (cur_buf < buf + buf_len && isspace(*cur_buf)) + ++cur_buf; + + *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-'); + if (*is_negative) + ++cur_buf; + + consumed = cur_buf - buf; + cur_len -= consumed; + if (!cur_len) + return -EINVAL; + + cur_len = min(cur_len, sizeof(str) - 1); + memcpy(str, cur_buf, cur_len); + str[cur_len] = '\0'; + cur_buf = str; + + cur_buf = _parse_integer_fixup_radix(cur_buf, &base); + val_len = _parse_integer(cur_buf, base, res); + + if (val_len & KSTRTOX_OVERFLOW) + return -ERANGE; + + if (val_len == 0) + return -EINVAL; + + cur_buf += val_len; + consumed += cur_buf - str; + + return consumed; +} + +static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, + long long *res) +{ + unsigned long long _res; + bool is_negative; + int err; + + err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); + if (err < 0) + return err; + if (is_negative) { + if ((long long)-_res > 0) + return -ERANGE; + *res = -_res; + } else { + if ((long long)_res < 0) + return -ERANGE; + *res = _res; + } + return err; +} + +BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, + long *, res) +{ + long long _res; + int err; + + err = __bpf_strtoll(buf, buf_len, flags, &_res); + if (err < 0) + return err; + if (_res != (long)_res) + return -ERANGE; + *res = _res; + return err; +} + +const struct bpf_func_proto bpf_strtol_proto = { + .func = bpf_strtol, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_LONG, +}; + +BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, + unsigned long *, res) +{ + unsigned long long _res; + bool is_negative; + int err; + + err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); + if (err < 0) + return err; + if (is_negative) + return -EINVAL; + if (_res != (unsigned long)_res) + return -ERANGE; + *res = _res; + return err; +} + +const struct bpf_func_proto bpf_strtoul_proto = { + .func = bpf_strtoul, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_LONG, +}; +#endif + +BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, + struct bpf_pidns_info *, nsdata, u32, size) +{ + struct task_struct *task = current; + struct pid_namespace *pidns; + int err = -EINVAL; + + if (unlikely(size != sizeof(struct bpf_pidns_info))) + goto clear; + + if (unlikely((u64)(dev_t)dev != dev)) + goto clear; + + if (unlikely(!task)) + goto clear; + + pidns = task_active_pid_ns(task); + if (unlikely(!pidns)) { + err = -ENOENT; + goto clear; + } + + if (!ns_match(&pidns->ns, (dev_t)dev, ino)) + goto clear; + + nsdata->pid = task_pid_nr_ns(task, pidns); + nsdata->tgid = task_tgid_nr_ns(task, pidns); + return 0; +clear: + memset((void *)nsdata, 0, (size_t) size); + return err; +} + +const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { + .func = bpf_get_ns_current_pid_tgid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; + +static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { + .func = bpf_get_raw_cpu_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, + u64, flags, void *, data, u64, size) +{ + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) + return -EINVAL; + + return bpf_event_output(map, flags, data, size, NULL, 0, NULL); +} + +const struct bpf_func_proto bpf_event_output_data_proto = { + .func = bpf_event_output_data, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, +}; + +BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size, + const void __user *, user_ptr) +{ + int ret = copy_from_user(dst, user_ptr, size); + + if (unlikely(ret)) { + memset(dst, 0, size); + ret = -EFAULT; + } + + return ret; +} + +const struct bpf_func_proto bpf_copy_from_user_proto = { + .func = bpf_copy_from_user, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) +{ + if (cpu >= nr_cpu_ids) + return (unsigned long)NULL; + + return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu); +} + +const struct bpf_func_proto bpf_per_cpu_ptr_proto = { + .func = bpf_per_cpu_ptr, + .gpl_only = false, + .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) +{ + return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr); +} + +const struct bpf_func_proto bpf_this_cpu_ptr_proto = { + .func = bpf_this_cpu_ptr, + .gpl_only = false, + .ret_type = RET_PTR_TO_MEM_OR_BTF_ID, + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, +}; + +const struct bpf_func_proto bpf_get_current_task_proto __weak; +const struct bpf_func_proto bpf_probe_read_user_proto __weak; +const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; +const struct bpf_func_proto bpf_probe_read_kernel_proto __weak; +const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; + +const struct bpf_func_proto * +bpf_base_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + case BPF_FUNC_map_push_elem: + return &bpf_map_push_elem_proto; + case BPF_FUNC_map_pop_elem: + return &bpf_map_pop_elem_proto; + case BPF_FUNC_map_peek_elem: + return &bpf_map_peek_elem_proto; + case BPF_FUNC_get_prandom_u32: + return &bpf_get_prandom_u32_proto; + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_raw_smp_processor_id_proto; + case BPF_FUNC_get_numa_node_id: + return &bpf_get_numa_node_id_proto; + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; + case BPF_FUNC_ktime_get_ns: + return &bpf_ktime_get_ns_proto; + case BPF_FUNC_ktime_get_boot_ns: + return &bpf_ktime_get_boot_ns_proto; + case BPF_FUNC_ringbuf_output: + return &bpf_ringbuf_output_proto; + case BPF_FUNC_ringbuf_reserve: + return &bpf_ringbuf_reserve_proto; + case BPF_FUNC_ringbuf_submit: + return &bpf_ringbuf_submit_proto; + case BPF_FUNC_ringbuf_discard: + return &bpf_ringbuf_discard_proto; + case BPF_FUNC_ringbuf_query: + return &bpf_ringbuf_query_proto; + default: + break; + } + + if (!bpf_capable()) + return NULL; + + switch (func_id) { + case BPF_FUNC_spin_lock: + return &bpf_spin_lock_proto; + case BPF_FUNC_spin_unlock: + return &bpf_spin_unlock_proto; + case BPF_FUNC_jiffies64: + return &bpf_jiffies64_proto; + case BPF_FUNC_per_cpu_ptr: + return &bpf_per_cpu_ptr_proto; + case BPF_FUNC_this_cpu_ptr: + return &bpf_this_cpu_ptr_proto; + default: + break; + } + + if (!perfmon_capable()) + return NULL; + + switch (func_id) { + case BPF_FUNC_trace_printk: + return bpf_get_trace_printk_proto(); + case BPF_FUNC_get_current_task: + return &bpf_get_current_task_proto; + case BPF_FUNC_probe_read_user: + return &bpf_probe_read_user_proto; + case BPF_FUNC_probe_read_kernel: + return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + NULL : &bpf_probe_read_kernel_proto; + case BPF_FUNC_probe_read_user_str: + return &bpf_probe_read_user_str_proto; + case BPF_FUNC_probe_read_kernel_str: + return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + NULL : &bpf_probe_read_kernel_str_proto; + case BPF_FUNC_snprintf_btf: + return &bpf_snprintf_btf_proto; + default: + return NULL; + } +} |