summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditfilter.c5
-rw-r--r--kernel/bpf/lpm_trie.c13
-rw-r--r--kernel/bpf/syscall.c16
-rw-r--r--kernel/bpf/verifier.c53
-rw-r--r--kernel/cgroup/cpuset.c143
-rw-r--r--kernel/cpu.c48
-rw-r--r--kernel/debug/kdb/kdb_io.c99
-rw-r--r--kernel/dma/map_benchmark.c22
-rw-r--r--kernel/events/core.c13
-rw-r--r--kernel/gcov/gcc_4_7.c4
-rwxr-xr-xkernel/gen_kheaders.sh9
-rw-r--r--kernel/irq/cpuhotplug.c16
-rw-r--r--kernel/irq/irqdesc.c5
-rw-r--r--kernel/kcov.c1
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/padata.c8
-rw-r--r--kernel/pid_namespace.c1
-rw-r--r--kernel/power/process.c2
-rw-r--r--kernel/rcu/rcutorture.c16
-rw-r--r--kernel/rcu/tasks.h2
-rw-r--r--kernel/rcu/tree_stall.h3
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/fair.c53
-rw-r--r--kernel/sched/topology.c2
-rw-r--r--kernel/time/clocksource.c42
-rw-r--r--kernel/time/tick-common.c42
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/bpf_trace.c8
-rw-r--r--kernel/trace/ftrace.c40
-rw-r--r--kernel/trace/preemptirq_delay_test.c1
-rw-r--r--kernel/trace/ring_buffer.c9
-rw-r--r--kernel/trace/rv/rv.c2
-rw-r--r--kernel/trace/trace_events_user.c76
-rw-r--r--kernel/trace/trace_probe.c4
34 files changed, 437 insertions, 333 deletions
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index be8c68012..d6ef4f4f9 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -529,7 +529,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
entry->rule.buflen += f_val;
f->lsm_str = str;
err = security_audit_rule_init(f->type, f->op, str,
- (void **)&f->lsm_rule);
+ (void **)&f->lsm_rule,
+ GFP_KERNEL);
/* Keep currently invalid fields around in case they
* become valid after a policy reload. */
if (err == -EINVAL) {
@@ -799,7 +800,7 @@ static inline int audit_dupe_lsm_field(struct audit_field *df,
/* our own (refreshed) copy of lsm_rule */
ret = security_audit_rule_init(df->type, df->op, df->lsm_str,
- (void **)&df->lsm_rule);
+ (void **)&df->lsm_rule, GFP_KERNEL);
/* Keep currently invalid fields around in case they
* become valid after a policy reload. */
if (ret == -EINVAL) {
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 050fe1ebf..d0febf070 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -308,6 +308,7 @@ static long trie_update_elem(struct bpf_map *map,
{
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
+ struct lpm_trie_node *free_node = NULL;
struct lpm_trie_node __rcu **slot;
struct bpf_lpm_trie_key_u8 *key = _key;
unsigned long irq_flags;
@@ -382,7 +383,7 @@ static long trie_update_elem(struct bpf_map *map,
trie->n_entries--;
rcu_assign_pointer(*slot, new_node);
- kfree_rcu(node, rcu);
+ free_node = node;
goto out;
}
@@ -429,6 +430,7 @@ out:
}
spin_unlock_irqrestore(&trie->lock, irq_flags);
+ kfree_rcu(free_node, rcu);
return ret;
}
@@ -437,6 +439,7 @@ out:
static long trie_delete_elem(struct bpf_map *map, void *_key)
{
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+ struct lpm_trie_node *free_node = NULL, *free_parent = NULL;
struct bpf_lpm_trie_key_u8 *key = _key;
struct lpm_trie_node __rcu **trim, **trim2;
struct lpm_trie_node *node, *parent;
@@ -506,8 +509,8 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
else
rcu_assign_pointer(
*trim2, rcu_access_pointer(parent->child[0]));
- kfree_rcu(parent, rcu);
- kfree_rcu(node, rcu);
+ free_parent = parent;
+ free_node = node;
goto out;
}
@@ -521,10 +524,12 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1]));
else
RCU_INIT_POINTER(*trim, NULL);
- kfree_rcu(node, rcu);
+ free_node = node;
out:
spin_unlock_irqrestore(&trie->lock, irq_flags);
+ kfree_rcu(free_parent, rcu);
+ kfree_rcu(free_node, rcu);
return ret;
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c28792547..52ffe3335 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2985,6 +2985,7 @@ static int bpf_obj_get(const union bpf_attr *attr)
void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
const struct bpf_link_ops *ops, struct bpf_prog *prog)
{
+ WARN_ON(ops->dealloc && ops->dealloc_deferred);
atomic64_set(&link->refcnt, 1);
link->type = type;
link->id = 0;
@@ -3043,16 +3044,17 @@ static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
+ const struct bpf_link_ops *ops = link->ops;
bool sleepable = false;
bpf_link_free_id(link->id);
if (link->prog) {
sleepable = link->prog->sleepable;
/* detach BPF program, clean up used resources */
- link->ops->release(link);
+ ops->release(link);
bpf_prog_put(link->prog);
}
- if (link->ops->dealloc_deferred) {
+ if (ops->dealloc_deferred) {
/* schedule BPF link deallocation; if underlying BPF program
* is sleepable, we need to first wait for RCU tasks trace
* sync, then go through "classic" RCU grace period
@@ -3061,9 +3063,8 @@ static void bpf_link_free(struct bpf_link *link)
call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
else
call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
- }
- if (link->ops->dealloc)
- link->ops->dealloc(link);
+ } else if (ops->dealloc)
+ ops->dealloc(link);
}
static void bpf_link_put_deferred(struct work_struct *work)
@@ -3985,6 +3986,11 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
* check permissions at attach time.
*/
return -EPERM;
+
+ ptype = attach_type_to_prog_type(attach_type);
+ if (prog->type != ptype)
+ return -EINVAL;
+
return prog->enforce_expected_attach_type &&
prog->expected_attach_type != attach_type ?
-EINVAL : 0;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index cb7ad1f79..0ef18ae40 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2359,6 +2359,8 @@ static void mark_btf_ld_reg(struct bpf_verifier_env *env,
regs[regno].type = PTR_TO_BTF_ID | flag;
regs[regno].btf = btf;
regs[regno].btf_id = btf_id;
+ if (type_may_be_null(flag))
+ regs[regno].id = ++env->id_gen;
}
#define DEF_NOT_SUBREG (0)
@@ -3615,7 +3617,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* sreg needs precision before this insn
*/
bt_clear_reg(bt, dreg);
- bt_set_reg(bt, sreg);
+ if (sreg != BPF_REG_FP)
+ bt_set_reg(bt, sreg);
} else {
/* dreg = K
* dreg needs precision after this insn.
@@ -3631,7 +3634,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* both dreg and sreg need precision
* before this insn
*/
- bt_set_reg(bt, sreg);
+ if (sreg != BPF_REG_FP)
+ bt_set_reg(bt, sreg);
} /* else dreg += K
* dreg still needs precision before this insn
*/
@@ -5386,8 +5390,6 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
*/
mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
kptr_field->kptr.btf_id, btf_ld_kptr_type(env, kptr_field));
- /* For mark_ptr_or_null_reg */
- val_reg->id = ++env->id_gen;
} else if (class == BPF_STX) {
val_reg = reg_state(env, value_regno);
if (!register_is_null(val_reg) &&
@@ -5705,7 +5707,8 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg)
return true;
/* Types listed in the reg2btf_ids are always trusted */
- if (reg2btf_ids[base_type(reg->type)])
+ if (reg2btf_ids[base_type(reg->type)] &&
+ !bpf_type_has_unsafe_modifiers(reg->type))
return true;
/* If a register is not referenced, it is trusted if it has the
@@ -6325,6 +6328,7 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
#define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu)
#define BTF_TYPE_SAFE_RCU_OR_NULL(__type) __PASTE(__type, __safe_rcu_or_null)
#define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted)
+#define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type) __PASTE(__type, __safe_trusted_or_null)
/*
* Allow list few fields as RCU trusted or full trusted.
@@ -6388,7 +6392,7 @@ BTF_TYPE_SAFE_TRUSTED(struct dentry) {
struct inode *d_inode;
};
-BTF_TYPE_SAFE_TRUSTED(struct socket) {
+BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
struct sock *sk;
};
@@ -6423,11 +6427,20 @@ static bool type_is_trusted(struct bpf_verifier_env *env,
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
- BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
}
+static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ const char *field_name, u32 btf_id)
+{
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
+
+ return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
+ "__safe_trusted_or_null");
+}
+
static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
struct bpf_reg_state *regs,
int regno, int off, int size,
@@ -6536,6 +6549,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
*/
if (type_is_trusted(env, reg, field_name, btf_id)) {
flag |= PTR_TRUSTED;
+ } else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
+ flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
if (type_is_rcu(env, reg, field_name, btf_id)) {
/* ignore __rcu tag and mark it MEM_RCU */
@@ -8830,7 +8845,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
enum bpf_attach_type eatype = env->prog->expected_attach_type;
enum bpf_prog_type type = resolve_prog_type(env->prog);
- if (func_id != BPF_FUNC_map_update_elem)
+ if (func_id != BPF_FUNC_map_update_elem &&
+ func_id != BPF_FUNC_map_delete_elem)
return false;
/* It's not possible to get access to a locked struct sock in these
@@ -8841,6 +8857,11 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
if (eatype == BPF_TRACE_ITER)
return true;
break;
+ case BPF_PROG_TYPE_SOCK_OPS:
+ /* map_update allowed only via dedicated helpers with event type checks */
+ if (func_id == BPF_FUNC_map_delete_elem)
+ return true;
+ break;
case BPF_PROG_TYPE_SOCKET_FILTER:
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
@@ -8936,7 +8957,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_SOCKMAP:
if (func_id != BPF_FUNC_sk_redirect_map &&
func_id != BPF_FUNC_sock_map_update &&
- func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_map &&
func_id != BPF_FUNC_sk_select_reuseport &&
func_id != BPF_FUNC_map_lookup_elem &&
@@ -8946,7 +8966,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_SOCKHASH:
if (func_id != BPF_FUNC_sk_redirect_hash &&
func_id != BPF_FUNC_sock_hash_update &&
- func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_hash &&
func_id != BPF_FUNC_sk_select_reuseport &&
func_id != BPF_FUNC_map_lookup_elem &&
@@ -14954,7 +14973,6 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
struct bpf_reg_state *eq_branch_regs;
- struct bpf_reg_state fake_reg = {};
u8 opcode = BPF_OP(insn->code);
bool is_jmp32;
int pred = -1;
@@ -15020,7 +15038,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
return -EINVAL;
}
- src_reg = &fake_reg;
+ src_reg = &env->fake_reg[0];
+ memset(src_reg, 0, sizeof(*src_reg));
src_reg->type = SCALAR_VALUE;
__mark_reg_known(src_reg, insn->imm);
}
@@ -15080,10 +15099,16 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
&other_branch_regs[insn->src_reg],
dst_reg, src_reg, opcode, is_jmp32);
} else /* BPF_SRC(insn->code) == BPF_K */ {
+ /* reg_set_min_max() can mangle the fake_reg. Make a copy
+ * so that these are two different memory locations. The
+ * src_reg is not used beyond here in context of K.
+ */
+ memcpy(&env->fake_reg[1], &env->fake_reg[0],
+ sizeof(env->fake_reg[0]));
err = reg_set_min_max(env,
&other_branch_regs[insn->dst_reg],
- src_reg /* fake one */,
- dst_reg, src_reg /* same fake one */,
+ &env->fake_reg[0],
+ dst_reg, &env->fake_reg[1],
opcode, is_jmp32);
}
if (err)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 4237c8748..73ef0dabc 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -202,6 +202,14 @@ struct cpuset {
};
/*
+ * Legacy hierarchy call to cgroup_transfer_tasks() is handled asynchrously
+ */
+struct cpuset_remove_tasks_struct {
+ struct work_struct work;
+ struct cpuset *cs;
+};
+
+/*
* Exclusive CPUs distributed out to sub-partitions of top_cpuset
*/
static cpumask_var_t subpartitions_cpus;
@@ -449,12 +457,6 @@ static DEFINE_SPINLOCK(callback_lock);
static struct workqueue_struct *cpuset_migrate_mm_wq;
-/*
- * CPU / memory hotplug is handled asynchronously.
- */
-static void cpuset_hotplug_workfn(struct work_struct *work);
-static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
-
static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
static inline void check_insane_mems_config(nodemask_t *nodes)
@@ -540,22 +542,10 @@ static void guarantee_online_cpus(struct task_struct *tsk,
rcu_read_lock();
cs = task_cs(tsk);
- while (!cpumask_intersects(cs->effective_cpus, pmask)) {
+ while (!cpumask_intersects(cs->effective_cpus, pmask))
cs = parent_cs(cs);
- if (unlikely(!cs)) {
- /*
- * The top cpuset doesn't have any online cpu as a
- * consequence of a race between cpuset_hotplug_work
- * and cpu hotplug notifier. But we know the top
- * cpuset's effective_cpus is on its way to be
- * identical to cpu_online_mask.
- */
- goto out_unlock;
- }
- }
- cpumask_and(pmask, pmask, cs->effective_cpus);
-out_unlock:
+ cpumask_and(pmask, pmask, cs->effective_cpus);
rcu_read_unlock();
}
@@ -1217,7 +1207,7 @@ static void rebuild_sched_domains_locked(void)
/*
* If we have raced with CPU hotplug, return early to avoid
* passing doms with offlined cpu to partition_sched_domains().
- * Anyways, cpuset_hotplug_workfn() will rebuild sched domains.
+ * Anyways, cpuset_handle_hotplug() will rebuild sched domains.
*
* With no CPUs in any subpartitions, top_cpuset's effective CPUs
* should be the same as the active CPUs, so checking only top_cpuset
@@ -1260,12 +1250,17 @@ static void rebuild_sched_domains_locked(void)
}
#endif /* CONFIG_SMP */
-void rebuild_sched_domains(void)
+static void rebuild_sched_domains_cpuslocked(void)
{
- cpus_read_lock();
mutex_lock(&cpuset_mutex);
rebuild_sched_domains_locked();
mutex_unlock(&cpuset_mutex);
+}
+
+void rebuild_sched_domains(void)
+{
+ cpus_read_lock();
+ rebuild_sched_domains_cpuslocked();
cpus_read_unlock();
}
@@ -2079,14 +2074,11 @@ write_error:
/*
* For partcmd_update without newmask, it is being called from
- * cpuset_hotplug_workfn() where cpus_read_lock() wasn't taken.
- * Update the load balance flag and scheduling domain if
- * cpus_read_trylock() is successful.
+ * cpuset_handle_hotplug(). Update the load balance flag and
+ * scheduling domain accordingly.
*/
- if ((cmd == partcmd_update) && !newmask && cpus_read_trylock()) {
+ if ((cmd == partcmd_update) && !newmask)
update_partition_sd_lb(cs, old_prs);
- cpus_read_unlock();
- }
notify_partition_change(cs, old_prs);
return 0;
@@ -2948,7 +2940,7 @@ bool current_cpuset_is_being_rebound(void)
static int update_relax_domain_level(struct cpuset *cs, s64 val)
{
#ifdef CONFIG_SMP
- if (val < -1 || val >= sched_domain_level_max)
+ if (val < -1 || val > sched_domain_level_max + 1)
return -EINVAL;
#endif
@@ -3599,8 +3591,8 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
* proceeding, so that we don't end up keep removing tasks added
* after execution capability is restored.
*
- * cpuset_hotplug_work calls back into cgroup core via
- * cgroup_transfer_tasks() and waiting for it from a cgroupfs
+ * cpuset_handle_hotplug may call back into cgroup core asynchronously
+ * via cgroup_transfer_tasks() and waiting for it from a cgroupfs
* operation like this one can lead to a deadlock through kernfs
* active_ref protection. Let's break the protection. Losing the
* protection is okay as we check whether @cs is online after
@@ -3609,7 +3601,6 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
*/
css_get(&cs->css);
kernfs_break_active_protection(of->kn);
- flush_work(&cpuset_hotplug_work);
cpus_read_lock();
mutex_lock(&cpuset_mutex);
@@ -4354,6 +4345,16 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
}
}
+static void cpuset_migrate_tasks_workfn(struct work_struct *work)
+{
+ struct cpuset_remove_tasks_struct *s;
+
+ s = container_of(work, struct cpuset_remove_tasks_struct, work);
+ remove_tasks_in_empty_cpuset(s->cs);
+ css_put(&s->cs->css);
+ kfree(s);
+}
+
static void
hotplug_update_tasks_legacy(struct cpuset *cs,
struct cpumask *new_cpus, nodemask_t *new_mems,
@@ -4383,12 +4384,21 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
/*
* Move tasks to the nearest ancestor with execution resources,
* This is full cgroup operation which will also call back into
- * cpuset. Should be done outside any lock.
+ * cpuset. Execute it asynchronously using workqueue.
*/
- if (is_empty) {
- mutex_unlock(&cpuset_mutex);
- remove_tasks_in_empty_cpuset(cs);
- mutex_lock(&cpuset_mutex);
+ if (is_empty && cs->css.cgroup->nr_populated_csets &&
+ css_tryget_online(&cs->css)) {
+ struct cpuset_remove_tasks_struct *s;
+
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
+ if (WARN_ON_ONCE(!s)) {
+ css_put(&cs->css);
+ return;
+ }
+
+ s->cs = cs;
+ INIT_WORK(&s->work, cpuset_migrate_tasks_workfn);
+ schedule_work(&s->work);
}
}
@@ -4421,30 +4431,6 @@ void cpuset_force_rebuild(void)
force_rebuild = true;
}
-/*
- * Attempt to acquire a cpus_read_lock while a hotplug operation may be in
- * progress.
- * Return: true if successful, false otherwise
- *
- * To avoid circular lock dependency between cpuset_mutex and cpus_read_lock,
- * cpus_read_trylock() is used here to acquire the lock.
- */
-static bool cpuset_hotplug_cpus_read_trylock(void)
-{
- int retries = 0;
-
- while (!cpus_read_trylock()) {
- /*
- * CPU hotplug still in progress. Retry 5 times
- * with a 10ms wait before bailing out.
- */
- if (++retries > 5)
- return false;
- msleep(10);
- }
- return true;
-}
-
/**
* cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug
* @cs: cpuset in interest
@@ -4493,13 +4479,11 @@ retry:
compute_partition_effective_cpumask(cs, &new_cpus);
if (remote && cpumask_empty(&new_cpus) &&
- partition_is_populated(cs, NULL) &&
- cpuset_hotplug_cpus_read_trylock()) {
+ partition_is_populated(cs, NULL)) {
remote_partition_disable(cs, tmp);
compute_effective_cpumask(&new_cpus, cs, parent);
remote = false;
cpuset_force_rebuild();
- cpus_read_unlock();
}
/*
@@ -4519,18 +4503,8 @@ retry:
else if (is_partition_valid(parent) && is_partition_invalid(cs))
partcmd = partcmd_update;
- /*
- * cpus_read_lock needs to be held before calling
- * update_parent_effective_cpumask(). To avoid circular lock
- * dependency between cpuset_mutex and cpus_read_lock,
- * cpus_read_trylock() is used here to acquire the lock.
- */
if (partcmd >= 0) {
- if (!cpuset_hotplug_cpus_read_trylock())
- goto update_tasks;
-
update_parent_effective_cpumask(cs, partcmd, NULL, tmp);
- cpus_read_unlock();
if ((partcmd == partcmd_invalidate) || is_partition_valid(cs)) {
compute_partition_effective_cpumask(cs, &new_cpus);
cpuset_force_rebuild();
@@ -4558,8 +4532,7 @@ unlock:
}
/**
- * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset
- * @work: unused
+ * cpuset_handle_hotplug - handle CPU/memory hot{,un}plug for a cpuset
*
* This function is called after either CPU or memory configuration has
* changed and updates cpuset accordingly. The top_cpuset is always
@@ -4573,8 +4546,10 @@ unlock:
*
* Note that CPU offlining during suspend is ignored. We don't modify
* cpusets across suspend/resume cycles at all.
+ *
+ * CPU / memory hotplug is handled synchronously.
*/
-static void cpuset_hotplug_workfn(struct work_struct *work)
+static void cpuset_handle_hotplug(void)
{
static cpumask_t new_cpus;
static nodemask_t new_mems;
@@ -4585,6 +4560,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
if (on_dfl && !alloc_cpumasks(NULL, &tmp))
ptmp = &tmp;
+ lockdep_assert_cpus_held();
mutex_lock(&cpuset_mutex);
/* fetch the available cpus/mems and find out which changed how */
@@ -4666,7 +4642,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
/* rebuild sched domains if cpus_allowed has changed */
if (cpus_updated || force_rebuild) {
force_rebuild = false;
- rebuild_sched_domains();
+ rebuild_sched_domains_cpuslocked();
}
free_cpumasks(NULL, ptmp);
@@ -4679,12 +4655,7 @@ void cpuset_update_active_cpus(void)
* inside cgroup synchronization. Bounce actual hotplug processing
* to a work item to avoid reverse locking order.
*/
- schedule_work(&cpuset_hotplug_work);
-}
-
-void cpuset_wait_for_hotplug(void)
-{
- flush_work(&cpuset_hotplug_work);
+ cpuset_handle_hotplug();
}
/*
@@ -4695,7 +4666,7 @@ void cpuset_wait_for_hotplug(void)
static int cpuset_track_online_nodes(struct notifier_block *self,
unsigned long action, void *arg)
{
- schedule_work(&cpuset_hotplug_work);
+ cpuset_handle_hotplug();
return NOTIFY_OK;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 63447eb85..563877d6c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1208,52 +1208,6 @@ void __init cpuhp_threads_init(void)
kthread_unpark(this_cpu_read(cpuhp_state.thread));
}
-/*
- *
- * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
- * protected region.
- *
- * The operation is still serialized against concurrent CPU hotplug via
- * cpu_add_remove_lock, i.e. CPU map protection. But it is _not_
- * serialized against other hotplug related activity like adding or
- * removing of state callbacks and state instances, which invoke either the
- * startup or the teardown callback of the affected state.
- *
- * This is required for subsystems which are unfixable vs. CPU hotplug and
- * evade lock inversion problems by scheduling work which has to be
- * completed _before_ cpu_up()/_cpu_down() returns.
- *
- * Don't even think about adding anything to this for any new code or even
- * drivers. It's only purpose is to keep existing lock order trainwrecks
- * working.
- *
- * For cpu_down() there might be valid reasons to finish cleanups which are
- * not required to be done under cpu_hotplug_lock, but that's a different
- * story and would be not invoked via this.
- */
-static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
-{
- /*
- * cpusets delegate hotplug operations to a worker to "solve" the
- * lock order problems. Wait for the worker, but only if tasks are
- * _not_ frozen (suspend, hibernate) as that would wait forever.
- *
- * The wait is required because otherwise the hotplug operation
- * returns with inconsistent state, which could even be observed in
- * user space when a new CPU is brought up. The CPU plug uevent
- * would be delivered and user space reacting on it would fail to
- * move tasks to the newly plugged CPU up to the point where the
- * work has finished because up to that point the newly plugged CPU
- * is not assignable in cpusets/cgroups. On unplug that's not
- * necessarily a visible issue, but it is still inconsistent state,
- * which is the real problem which needs to be "fixed". This can't
- * prevent the transient state between scheduling the work and
- * returning from waiting for it.
- */
- if (!tasks_frozen)
- cpuset_wait_for_hotplug();
-}
-
#ifdef CONFIG_HOTPLUG_CPU
#ifndef arch_clear_mm_cpumask_cpu
#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
@@ -1494,7 +1448,6 @@ out:
*/
lockup_detector_cleanup();
arch_smt_update();
- cpu_up_down_serialize_trainwrecks(tasks_frozen);
return ret;
}
@@ -1728,7 +1681,6 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
out:
cpus_write_unlock();
arch_smt_update();
- cpu_up_down_serialize_trainwrecks(tasks_frozen);
return ret;
}
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 9443bc63c..2aeaf9765 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -184,6 +184,33 @@ char kdb_getchar(void)
unreachable();
}
+/**
+ * kdb_position_cursor() - Place cursor in the correct horizontal position
+ * @prompt: Nil-terminated string containing the prompt string
+ * @buffer: Nil-terminated string containing the entire command line
+ * @cp: Cursor position, pointer the character in buffer where the cursor
+ * should be positioned.
+ *
+ * The cursor is positioned by sending a carriage-return and then printing
+ * the content of the line until we reach the correct cursor position.
+ *
+ * There is some additional fine detail here.
+ *
+ * Firstly, even though kdb_printf() will correctly format zero-width fields
+ * we want the second call to kdb_printf() to be conditional. That keeps things
+ * a little cleaner when LOGGING=1.
+ *
+ * Secondly, we can't combine everything into one call to kdb_printf() since
+ * that renders into a fixed length buffer and the combined print could result
+ * in unwanted truncation.
+ */
+static void kdb_position_cursor(char *prompt, char *buffer, char *cp)
+{
+ kdb_printf("\r%s", kdb_prompt_str);
+ if (cp > buffer)
+ kdb_printf("%.*s", (int)(cp - buffer), buffer);
+}
+
/*
* kdb_read
*
@@ -212,7 +239,6 @@ static char *kdb_read(char *buffer, size_t bufsize)
* and null byte */
char *lastchar;
char *p_tmp;
- char tmp;
static char tmpbuffer[CMD_BUFLEN];
int len = strlen(buffer);
int len_tmp;
@@ -249,12 +275,8 @@ poll_again:
}
*(--lastchar) = '\0';
--cp;
- kdb_printf("\b%s \r", cp);
- tmp = *cp;
- *cp = '\0';
- kdb_printf(kdb_prompt_str);
- kdb_printf("%s", buffer);
- *cp = tmp;
+ kdb_printf("\b%s ", cp);
+ kdb_position_cursor(kdb_prompt_str, buffer, cp);
}
break;
case 10: /* linefeed */
@@ -272,19 +294,14 @@ poll_again:
memcpy(tmpbuffer, cp+1, lastchar - cp - 1);
memcpy(cp, tmpbuffer, lastchar - cp - 1);
*(--lastchar) = '\0';
- kdb_printf("%s \r", cp);
- tmp = *cp;
- *cp = '\0';
- kdb_printf(kdb_prompt_str);
- kdb_printf("%s", buffer);
- *cp = tmp;
+ kdb_printf("%s ", cp);
+ kdb_position_cursor(kdb_prompt_str, buffer, cp);
}
break;
case 1: /* Home */
if (cp > buffer) {
- kdb_printf("\r");
- kdb_printf(kdb_prompt_str);
cp = buffer;
+ kdb_position_cursor(kdb_prompt_str, buffer, cp);
}
break;
case 5: /* End */
@@ -300,11 +317,10 @@ poll_again:
}
break;
case 14: /* Down */
- memset(tmpbuffer, ' ',
- strlen(kdb_prompt_str) + (lastchar-buffer));
- *(tmpbuffer+strlen(kdb_prompt_str) +
- (lastchar-buffer)) = '\0';
- kdb_printf("\r%s\r", tmpbuffer);
+ case 16: /* Up */
+ kdb_printf("\r%*c\r",
+ (int)(strlen(kdb_prompt_str) + (lastchar - buffer)),
+ ' ');
*lastchar = (char)key;
*(lastchar+1) = '\0';
return lastchar;
@@ -314,15 +330,6 @@ poll_again:
++cp;
}
break;
- case 16: /* Up */
- memset(tmpbuffer, ' ',
- strlen(kdb_prompt_str) + (lastchar-buffer));
- *(tmpbuffer+strlen(kdb_prompt_str) +
- (lastchar-buffer)) = '\0';
- kdb_printf("\r%s\r", tmpbuffer);
- *lastchar = (char)key;
- *(lastchar+1) = '\0';
- return lastchar;
case 9: /* Tab */
if (tab < 2)
++tab;
@@ -366,15 +373,25 @@ poll_again:
kdb_printf("\n");
kdb_printf(kdb_prompt_str);
kdb_printf("%s", buffer);
+ if (cp != lastchar)
+ kdb_position_cursor(kdb_prompt_str, buffer, cp);
} else if (tab != 2 && count > 0) {
- len_tmp = strlen(p_tmp);
- strncpy(p_tmp+len_tmp, cp, lastchar-cp+1);
- len_tmp = strlen(p_tmp);
- strncpy(cp, p_tmp+len, len_tmp-len + 1);
- len = len_tmp - len;
- kdb_printf("%s", cp);
- cp += len;
- lastchar += len;
+ /* How many new characters do we want from tmpbuffer? */
+ len_tmp = strlen(p_tmp) - len;
+ if (lastchar + len_tmp >= bufend)
+ len_tmp = bufend - lastchar;
+
+ if (len_tmp) {
+ /* + 1 ensures the '\0' is memmove'd */
+ memmove(cp+len_tmp, cp, (lastchar-cp) + 1);
+ memcpy(cp, p_tmp+len, len_tmp);
+ kdb_printf("%s", cp);
+ cp += len_tmp;
+ lastchar += len_tmp;
+ if (cp != lastchar)
+ kdb_position_cursor(kdb_prompt_str,
+ buffer, cp);
+ }
}
kdb_nextline = 1; /* reset output line number */
break;
@@ -385,13 +402,9 @@ poll_again:
memcpy(cp+1, tmpbuffer, lastchar - cp);
*++lastchar = '\0';
*cp = key;
- kdb_printf("%s\r", cp);
+ kdb_printf("%s", cp);
++cp;
- tmp = *cp;
- *cp = '\0';
- kdb_printf(kdb_prompt_str);
- kdb_printf("%s", buffer);
- *cp = tmp;
+ kdb_position_cursor(kdb_prompt_str, buffer, cp);
} else {
*++lastchar = '\0';
*cp++ = key;
diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index 02205ab53..f7f3d14fa 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -101,7 +101,6 @@ static int do_map_benchmark(struct map_benchmark_data *map)
struct task_struct **tsk;
int threads = map->bparam.threads;
int node = map->bparam.node;
- const cpumask_t *cpu_mask = cpumask_of_node(node);
u64 loops;
int ret = 0;
int i;
@@ -118,11 +117,13 @@ static int do_map_benchmark(struct map_benchmark_data *map)
if (IS_ERR(tsk[i])) {
pr_err("create dma_map thread failed\n");
ret = PTR_ERR(tsk[i]);
+ while (--i >= 0)
+ kthread_stop(tsk[i]);
goto out;
}
if (node != NUMA_NO_NODE)
- kthread_bind_mask(tsk[i], cpu_mask);
+ kthread_bind_mask(tsk[i], cpumask_of_node(node));
}
/* clear the old value in the previous benchmark */
@@ -139,13 +140,17 @@ static int do_map_benchmark(struct map_benchmark_data *map)
msleep_interruptible(map->bparam.seconds * 1000);
- /* wait for the completion of benchmark threads */
+ /* wait for the completion of all started benchmark threads */
for (i = 0; i < threads; i++) {
- ret = kthread_stop(tsk[i]);
- if (ret)
- goto out;
+ int kthread_ret = kthread_stop_put(tsk[i]);
+
+ if (kthread_ret)
+ ret = kthread_ret;
}
+ if (ret)
+ goto out;
+
loops = atomic64_read(&map->loops);
if (likely(loops > 0)) {
u64 map_variance, unmap_variance;
@@ -170,8 +175,6 @@ static int do_map_benchmark(struct map_benchmark_data *map)
}
out:
- for (i = 0; i < threads; i++)
- put_task_struct(tsk[i]);
put_device(map->dev);
kfree(tsk);
return ret;
@@ -208,7 +211,8 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
}
if (map->bparam.node != NUMA_NO_NODE &&
- !node_possible(map->bparam.node)) {
+ (map->bparam.node < 0 || map->bparam.node >= MAX_NUMNODES ||
+ !node_possible(map->bparam.node))) {
pr_err("invalid numa node\n");
return -EINVAL;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 724e6d7e1..4082d0161 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5365,6 +5365,7 @@ int perf_event_release_kernel(struct perf_event *event)
again:
mutex_lock(&event->child_mutex);
list_for_each_entry(child, &event->child_list, child_list) {
+ void *var = NULL;
/*
* Cannot change, child events are not migrated, see the
@@ -5405,11 +5406,23 @@ again:
* this can't be the last reference.
*/
put_event(event);
+ } else {
+ var = &ctx->refcount;
}
mutex_unlock(&event->child_mutex);
mutex_unlock(&ctx->mutex);
put_ctx(ctx);
+
+ if (var) {
+ /*
+ * If perf_event_free_task() has deleted all events from the
+ * ctx while the child_mutex got released above, make sure to
+ * notify about the preceding put_ctx().
+ */
+ smp_mb(); /* pairs with wait_var_event() */
+ wake_up_var(var);
+ }
goto again;
}
mutex_unlock(&event->child_mutex);
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index 74a4ef1da..fd75b4a48 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,9 @@
#include <linux/mm.h>
#include "gcov.h"
-#if (__GNUC__ >= 10)
+#if (__GNUC__ >= 14)
+#define GCOV_COUNTERS 9
+#elif (__GNUC__ >= 10)
#define GCOV_COUNTERS 8
#elif (__GNUC__ >= 7)
#define GCOV_COUNTERS 9
diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 6d443ea22..383fd43ac 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -14,7 +14,12 @@ include/
arch/$SRCARCH/include/
"
-type cpio > /dev/null
+if ! command -v cpio >/dev/null; then
+ echo >&2 "***"
+ echo >&2 "*** 'cpio' could not be found."
+ echo >&2 "***"
+ exit 1
+fi
# Support incremental builds by skipping archive generation
# if timestamps of files being archived are not changed.
@@ -84,7 +89,7 @@ find $cpio_dir -type f -print0 |
# Create archive and try to normalize metadata for reproducibility.
tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
- --owner=0 --group=0 --sort=name --numeric-owner \
+ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \
-I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null
echo $headers_md5 > kernel/kheaders.md5
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 1ed2b1739..5ecd072a3 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_desc *desc)
}
/*
+ * Complete an eventually pending irq move cleanup. If this
+ * interrupt was moved in hard irq context, then the vectors need
+ * to be cleaned up. It can't wait until this interrupt actually
+ * happens and this CPU was involved.
+ */
+ irq_force_complete_move(desc);
+
+ /*
* No move required, if:
* - Interrupt is per cpu
* - Interrupt is not started
@@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_desc *desc)
}
/*
- * Complete an eventually pending irq move cleanup. If this
- * interrupt was moved in hard irq context, then the vectors need
- * to be cleaned up. It can't wait until this interrupt actually
- * happens and this CPU was involved.
- */
- irq_force_complete_move(desc);
-
- /*
* If there is a setaffinity pending, then try to reuse the pending
* mask, so the last change of the affinity does not get lost. If
* there is no move pending or the pending mask does not contain
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 4c6b32318..7bf9f66ca 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -160,7 +160,10 @@ static int irq_find_free_area(unsigned int from, unsigned int cnt)
static unsigned int irq_find_at_or_after(unsigned int offset)
{
unsigned long index = offset;
- struct irq_desc *desc = mt_find(&sparse_irqs, &index, nr_irqs);
+ struct irq_desc *desc;
+
+ guard(rcu)();
+ desc = mt_find(&sparse_irqs, &index, nr_irqs);
return desc ? irq_desc_get_irq(desc) : nr_irqs;
}
diff --git a/kernel/kcov.c b/kernel/kcov.c
index f9ac2e9e4..9f4affae4 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -631,6 +631,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
return -EINVAL;
kcov->mode = mode;
t->kcov = kcov;
+ t->kcov_mode = KCOV_MODE_REMOTE;
kcov->t = t;
kcov->remote = true;
kcov->remote_size = remote_arg->area_size;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 65adc815f..4f917bdad 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1068,6 +1068,7 @@ static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = {
static int kprobe_ipmodify_enabled;
static int kprobe_ftrace_enabled;
+bool kprobe_ftrace_disabled;
static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
int *cnt)
@@ -1136,6 +1137,11 @@ static int disarm_kprobe_ftrace(struct kprobe *p)
ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
}
+
+void kprobe_ftrace_kill(void)
+{
+ kprobe_ftrace_disabled = true;
+}
#else /* !CONFIG_KPROBES_ON_FTRACE */
static inline int arm_kprobe_ftrace(struct kprobe *p)
{
diff --git a/kernel/padata.c b/kernel/padata.c
index e3f639ff1..53f4bc912 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -106,7 +106,7 @@ static int __init padata_work_alloc_mt(int nworks, void *data,
{
int i;
- spin_lock(&padata_works_lock);
+ spin_lock_bh(&padata_works_lock);
/* Start at 1 because the current task participates in the job. */
for (i = 1; i < nworks; ++i) {
struct padata_work *pw = padata_work_alloc();
@@ -116,7 +116,7 @@ static int __init padata_work_alloc_mt(int nworks, void *data,
padata_work_init(pw, padata_mt_helper, data, 0);
list_add(&pw->pw_list, head);
}
- spin_unlock(&padata_works_lock);
+ spin_unlock_bh(&padata_works_lock);
return i;
}
@@ -134,12 +134,12 @@ static void __init padata_works_free(struct list_head *works)
if (list_empty(works))
return;
- spin_lock(&padata_works_lock);
+ spin_lock_bh(&padata_works_lock);
list_for_each_entry_safe(cur, next, works, pw_list) {
list_del(&cur->pw_list);
padata_work_free(cur);
}
- spin_unlock(&padata_works_lock);
+ spin_unlock_bh(&padata_works_lock);
}
static void padata_parallel_worker(struct work_struct *parallel_work)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 7ade20e95..415201ca0 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -218,6 +218,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
*/
do {
clear_thread_flag(TIF_SIGPENDING);
+ clear_thread_flag(TIF_NOTIFY_SIGNAL);
rc = kernel_wait4(-1, NULL, __WALL, NULL);
} while (rc != -ECHILD);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index cae81a87c..66ac067d9 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -194,8 +194,6 @@ void thaw_processes(void)
__usermodehelper_set_disable_depth(UMH_FREEZING);
thaw_workqueues();
- cpuset_wait_for_hotplug();
-
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
/* No other threads should have PF_SUSPEND_TASK set */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 45d6b4c3d..cf2e90753 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1997,7 +1997,8 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
preempt_disable();
pipe_count = READ_ONCE(p->rtort_pipe_count);
if (pipe_count > RCU_TORTURE_PIPE_LEN) {
- /* Should not happen, but... */
+ // Should not happen in a correct RCU implementation,
+ // happens quite often for torture_type=busted.
pipe_count = RCU_TORTURE_PIPE_LEN;
}
completed = cur_ops->get_gp_seq();
@@ -2486,8 +2487,8 @@ static int rcu_torture_stall(void *args)
preempt_disable();
pr_alert("%s start on CPU %d.\n",
__func__, raw_smp_processor_id());
- while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(),
- stop_at))
+ while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(), stop_at) &&
+ !kthread_should_stop())
if (stall_cpu_block) {
#ifdef CONFIG_PREEMPTION
preempt_schedule();
@@ -3040,11 +3041,12 @@ static void rcu_torture_barrier_cbf(struct rcu_head *rcu)
}
/* IPI handler to get callback posted on desired CPU, if online. */
-static void rcu_torture_barrier1cb(void *rcu_void)
+static int rcu_torture_barrier1cb(void *rcu_void)
{
struct rcu_head *rhp = rcu_void;
cur_ops->call(rhp, rcu_torture_barrier_cbf);
+ return 0;
}
/* kthread function to register callbacks used to test RCU barriers. */
@@ -3070,11 +3072,9 @@ static int rcu_torture_barrier_cbs(void *arg)
* The above smp_load_acquire() ensures barrier_phase load
* is ordered before the following ->call().
*/
- if (smp_call_function_single(myid, rcu_torture_barrier1cb,
- &rcu, 1)) {
- // IPI failed, so use direct call from current CPU.
+ if (smp_call_on_cpu(myid, rcu_torture_barrier1cb, &rcu, 1))
cur_ops->call(&rcu, rcu_torture_barrier_cbf);
- }
+
if (atomic_dec_and_test(&barrier_cbs_count))
wake_up(&barrier_wq);
} while (!torture_must_stop());
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 147b5945d..2a453de9f 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1994,7 +1994,7 @@ void show_rcu_tasks_trace_gp_kthread(void)
{
char buf[64];
- sprintf(buf, "N%lu h:%lu/%lu/%lu",
+ snprintf(buf, sizeof(buf), "N%lu h:%lu/%lu/%lu",
data_race(n_trc_holdouts),
data_race(n_heavy_reader_ofl_updates),
data_race(n_heavy_reader_updates),
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 5d6664285..b5ec62b2d 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -504,7 +504,8 @@ static void print_cpu_stall_info(int cpu)
rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu));
rcuc_starved = rcu_is_rcuc_kthread_starving(rdp, &j);
if (rcuc_starved)
- sprintf(buf, " rcuc=%ld jiffies(starved)", j);
+ // Print signed value, as negative values indicate a probable bug.
+ snprintf(buf, sizeof(buf), " rcuc=%ld jiffies(starved)", j);
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%04x/%ld/%#lx softirq=%u/%u fqs=%ld%s%s\n",
cpu,
"O."[!!cpu_online(cpu)],
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7019a4045..d211d40a2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11402,7 +11402,7 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of,
{
struct task_group *tg = css_tg(of_css(of));
u64 period = tg_get_cfs_period(tg);
- u64 burst = tg_get_cfs_burst(tg);
+ u64 burst = tg->cfs_bandwidth.burst;
u64 quota;
int ret;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c62805dbd..213c94d02 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6675,22 +6675,42 @@ static inline void hrtick_update(struct rq *rq)
#ifdef CONFIG_SMP
static inline bool cpu_overutilized(int cpu)
{
- unsigned long rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN);
- unsigned long rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX);
+ unsigned long rq_util_min, rq_util_max;
+
+ if (!sched_energy_enabled())
+ return false;
+
+ rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN);
+ rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX);
/* Return true only if the utilization doesn't fit CPU's capacity */
return !util_fits_cpu(cpu_util_cfs(cpu), rq_util_min, rq_util_max, cpu);
}
-static inline void update_overutilized_status(struct rq *rq)
+static inline void set_rd_overutilized_status(struct root_domain *rd,
+ unsigned int status)
{
- if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) {
- WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED);
- trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED);
- }
+ if (!sched_energy_enabled())
+ return;
+
+ WRITE_ONCE(rd->overutilized, status);
+ trace_sched_overutilized_tp(rd, !!status);
+}
+
+static inline void check_update_overutilized_status(struct rq *rq)
+{
+ /*
+ * overutilized field is used for load balancing decisions only
+ * if energy aware scheduler is being used
+ */
+ if (!sched_energy_enabled())
+ return;
+
+ if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu))
+ set_rd_overutilized_status(rq->rd, SG_OVERUTILIZED);
}
#else
-static inline void update_overutilized_status(struct rq *rq) { }
+static inline void check_update_overutilized_status(struct rq *rq) { }
#endif
/* Runqueue only has SCHED_IDLE tasks enqueued */
@@ -6791,7 +6811,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
* and the following generally works well enough in practice.
*/
if (!task_new)
- update_overutilized_status(rq);
+ check_update_overutilized_status(rq);
enqueue_throttle:
assert_list_leaf_cfs_rq(rq);
@@ -10608,19 +10628,14 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
env->fbq_type = fbq_classify_group(&sds->busiest_stat);
if (!env->sd->parent) {
- struct root_domain *rd = env->dst_rq->rd;
-
/* update overload indicator if we are at root domain */
- WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD);
+ WRITE_ONCE(env->dst_rq->rd->overload, sg_status & SG_OVERLOAD);
/* Update over-utilization (tipping point, U >= 0) indicator */
- WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED);
- trace_sched_overutilized_tp(rd, sg_status & SG_OVERUTILIZED);
+ set_rd_overutilized_status(env->dst_rq->rd,
+ sg_status & SG_OVERUTILIZED);
} else if (sg_status & SG_OVERUTILIZED) {
- struct root_domain *rd = env->dst_rq->rd;
-
- WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED);
- trace_sched_overutilized_tp(rd, SG_OVERUTILIZED);
+ set_rd_overutilized_status(env->dst_rq->rd, SG_OVERUTILIZED);
}
update_idle_cpu_scan(env, sum_util);
@@ -12621,7 +12636,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
task_tick_numa(rq, curr);
update_misfit_status(curr, rq);
- update_overutilized_status(task_rq(curr));
+ check_update_overutilized_status(task_rq(curr));
task_tick_core(rq, curr);
}
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 99ea59860..3127c9b30 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1468,7 +1468,7 @@ static void set_domain_attribute(struct sched_domain *sd,
} else
request = attr->relax_domain_level;
- if (sd->level > request) {
+ if (sd->level >= request) {
/* Turn off idle balance on this domain: */
sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
}
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e5b260aa0..4d50d53ac 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -20,6 +20,16 @@
#include "tick-internal.h"
#include "timekeeping_internal.h"
+static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end)
+{
+ u64 delta = clocksource_delta(end, start, cs->mask);
+
+ if (likely(delta < cs->max_cycles))
+ return clocksource_cyc2ns(delta, cs->mult, cs->shift);
+
+ return mul_u64_u32_shr(delta, cs->mult, cs->shift);
+}
+
/**
* clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
* @mult: pointer to mult variable
@@ -222,8 +232,8 @@ enum wd_read_status {
static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
{
unsigned int nretries, max_retries;
- u64 wd_end, wd_end2, wd_delta;
int64_t wd_delay, wd_seq_delay;
+ u64 wd_end, wd_end2;
max_retries = clocksource_get_max_watchdog_retry();
for (nretries = 0; nretries <= max_retries; nretries++) {
@@ -234,9 +244,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow,
wd_end2 = watchdog->read(watchdog);
local_irq_enable();
- wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
- wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
- watchdog->shift);
+ wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end);
if (wd_delay <= WATCHDOG_MAX_SKEW) {
if (nretries > 1 || nretries >= max_retries) {
pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
@@ -254,8 +262,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow,
* report system busy, reinit the watchdog and skip the current
* watchdog test.
*/
- wd_delta = clocksource_delta(wd_end2, wd_end, watchdog->mask);
- wd_seq_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift);
+ wd_seq_delay = cycles_to_nsec_safe(watchdog, wd_end, wd_end2);
if (wd_seq_delay > WATCHDOG_MAX_SKEW/2)
goto skip_test;
}
@@ -366,8 +373,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
delta = (csnow_end - csnow_mid) & cs->mask;
if (delta < 0)
cpumask_set_cpu(cpu, &cpus_ahead);
- delta = clocksource_delta(csnow_end, csnow_begin, cs->mask);
- cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+ cs_nsec = cycles_to_nsec_safe(cs, csnow_begin, csnow_end);
if (cs_nsec > cs_nsec_max)
cs_nsec_max = cs_nsec;
if (cs_nsec < cs_nsec_min)
@@ -398,8 +404,8 @@ static inline void clocksource_reset_watchdog(void)
static void clocksource_watchdog(struct timer_list *unused)
{
- u64 csnow, wdnow, cslast, wdlast, delta;
int64_t wd_nsec, cs_nsec, interval;
+ u64 csnow, wdnow, cslast, wdlast;
int next_cpu, reset_pending;
struct clocksource *cs;
enum wd_read_status read_ret;
@@ -456,12 +462,8 @@ static void clocksource_watchdog(struct timer_list *unused)
continue;
}
- delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask);
- wd_nsec = clocksource_cyc2ns(delta, watchdog->mult,
- watchdog->shift);
-
- delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
- cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+ wd_nsec = cycles_to_nsec_safe(watchdog, cs->wd_last, wdnow);
+ cs_nsec = cycles_to_nsec_safe(cs, cs->cs_last, csnow);
wdlast = cs->wd_last; /* save these in case we print them */
cslast = cs->cs_last;
cs->cs_last = csnow;
@@ -832,7 +834,7 @@ void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles)
*/
u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
{
- u64 now, delta, nsec = 0;
+ u64 now, nsec = 0;
if (!suspend_clocksource)
return 0;
@@ -847,12 +849,8 @@ u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
else
now = suspend_clocksource->read(suspend_clocksource);
- if (now > suspend_start) {
- delta = clocksource_delta(now, suspend_start,
- suspend_clocksource->mask);
- nsec = mul_u64_u32_shr(delta, suspend_clocksource->mult,
- suspend_clocksource->shift);
- }
+ if (now > suspend_start)
+ nsec = cycles_to_nsec_safe(suspend_clocksource, suspend_start, now);
/*
* Disable the suspend timer to save power if current clocksource is
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index d88b13076..a47bcf71d 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -178,26 +178,6 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
}
}
-#ifdef CONFIG_NO_HZ_FULL
-static void giveup_do_timer(void *info)
-{
- int cpu = *(unsigned int *)info;
-
- WARN_ON(tick_do_timer_cpu != smp_processor_id());
-
- tick_do_timer_cpu = cpu;
-}
-
-static void tick_take_do_timer_from_boot(void)
-{
- int cpu = smp_processor_id();
- int from = tick_do_timer_boot_cpu;
-
- if (from >= 0 && from != cpu)
- smp_call_function_single(from, giveup_do_timer, &cpu, 1);
-}
-#endif
-
/*
* Setup the tick device
*/
@@ -221,19 +201,25 @@ static void tick_setup_device(struct tick_device *td,
tick_next_period = ktime_get();
#ifdef CONFIG_NO_HZ_FULL
/*
- * The boot CPU may be nohz_full, in which case set
- * tick_do_timer_boot_cpu so the first housekeeping
- * secondary that comes up will take do_timer from
- * us.
+ * The boot CPU may be nohz_full, in which case the
+ * first housekeeping secondary will take do_timer()
+ * from it.
*/
if (tick_nohz_full_cpu(cpu))
tick_do_timer_boot_cpu = cpu;
- } else if (tick_do_timer_boot_cpu != -1 &&
- !tick_nohz_full_cpu(cpu)) {
- tick_take_do_timer_from_boot();
+ } else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) {
tick_do_timer_boot_cpu = -1;
- WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
+ /*
+ * The boot CPU will stay in periodic (NOHZ disabled)
+ * mode until clocksource_done_booting() called after
+ * smp_init() selects a high resolution clocksource and
+ * timekeeping_notify() kicks the NOHZ stuff alive.
+ *
+ * So this WRITE_ONCE can only race with the READ_ONCE
+ * check in tick_periodic() but this race is harmless.
+ */
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
#endif
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 47345bf1d..34804c715 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1123,7 +1123,7 @@ config PREEMPTIRQ_DELAY_TEST
config SYNTH_EVENT_GEN_TEST
tristate "Test module for in-kernel synthetic event generation"
- depends on SYNTH_EVENTS
+ depends on SYNTH_EVENTS && m
help
This option creates a test module to check the base
functionality of in-kernel synthetic event definition and
@@ -1136,7 +1136,7 @@ config SYNTH_EVENT_GEN_TEST
config KPROBE_EVENT_GEN_TEST
tristate "Test module for in-kernel kprobe event generation"
- depends on KPROBE_EVENTS
+ depends on KPROBE_EVENTS && m
help
This option creates a test module to check the base
functionality of in-kernel kprobe event definition.
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 9dc605f08..5d8f918c9 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3260,7 +3260,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
struct bpf_run_ctx *old_run_ctx;
int err = 0;
- if (link->task && current != link->task)
+ if (link->task && current->mm != link->task->mm)
return 0;
if (sleepable)
@@ -3361,8 +3361,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
cnt = attr->link_create.uprobe_multi.cnt;
+ pid = attr->link_create.uprobe_multi.pid;
- if (!upath || !uoffsets || !cnt)
+ if (!upath || !uoffsets || !cnt || pid < 0)
return -EINVAL;
if (cnt > MAX_UPROBE_MULTI_CNT)
return -E2BIG;
@@ -3386,10 +3387,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
goto error_path_put;
}
- pid = attr->link_create.uprobe_multi.pid;
if (pid) {
rcu_read_lock();
- task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
+ task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
rcu_read_unlock();
if (!task) {
err = -ESRCH;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index da1710499..2e1123672 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1595,12 +1595,15 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
unsigned long ftrace_location_range(unsigned long start, unsigned long end)
{
struct dyn_ftrace *rec;
+ unsigned long ip = 0;
+ rcu_read_lock();
rec = lookup_rec(start, end);
if (rec)
- return rec->ip;
+ ip = rec->ip;
+ rcu_read_unlock();
- return 0;
+ return ip;
}
/**
@@ -1614,25 +1617,22 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end)
*/
unsigned long ftrace_location(unsigned long ip)
{
- struct dyn_ftrace *rec;
+ unsigned long loc;
unsigned long offset;
unsigned long size;
- rec = lookup_rec(ip, ip);
- if (!rec) {
+ loc = ftrace_location_range(ip, ip);
+ if (!loc) {
if (!kallsyms_lookup_size_offset(ip, &size, &offset))
goto out;
/* map sym+0 to __fentry__ */
if (!offset)
- rec = lookup_rec(ip, ip + size - 1);
+ loc = ftrace_location_range(ip, ip + size - 1);
}
- if (rec)
- return rec->ip;
-
out:
- return 0;
+ return loc;
}
/**
@@ -6596,6 +6596,8 @@ static int ftrace_process_locs(struct module *mod,
/* We should have used all pages unless we skipped some */
if (pg_unuse) {
WARN_ON(!skipped);
+ /* Need to synchronize with ftrace_location_range() */
+ synchronize_rcu();
ftrace_free_pages(pg_unuse);
}
return ret;
@@ -6809,6 +6811,9 @@ void ftrace_release_mod(struct module *mod)
out_unlock:
mutex_unlock(&ftrace_lock);
+ /* Need to synchronize with ftrace_location_range() */
+ if (tmp_page)
+ synchronize_rcu();
for (pg = tmp_page; pg; pg = tmp_page) {
/* Needs to be called outside of ftrace_lock */
@@ -7142,6 +7147,7 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr)
unsigned long start = (unsigned long)(start_ptr);
unsigned long end = (unsigned long)(end_ptr);
struct ftrace_page **last_pg = &ftrace_pages_start;
+ struct ftrace_page *tmp_page = NULL;
struct ftrace_page *pg;
struct dyn_ftrace *rec;
struct dyn_ftrace key;
@@ -7183,12 +7189,8 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr)
ftrace_update_tot_cnt--;
if (!pg->index) {
*last_pg = pg->next;
- if (pg->records) {
- free_pages((unsigned long)pg->records, pg->order);
- ftrace_number_of_pages -= 1 << pg->order;
- }
- ftrace_number_of_groups--;
- kfree(pg);
+ pg->next = tmp_page;
+ tmp_page = pg;
pg = container_of(last_pg, struct ftrace_page, next);
if (!(*last_pg))
ftrace_pages = pg;
@@ -7205,6 +7207,11 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr)
clear_func_from_hashes(func);
kfree(func);
}
+ /* Need to synchronize with ftrace_location_range() */
+ if (tmp_page) {
+ synchronize_rcu();
+ ftrace_free_pages(tmp_page);
+ }
}
void __init ftrace_free_init_mem(void)
@@ -7895,6 +7902,7 @@ void ftrace_kill(void)
ftrace_disabled = 1;
ftrace_enabled = 0;
ftrace_trace_function = ftrace_stub;
+ kprobe_ftrace_kill();
}
/**
diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index 8c4ffd076..cb0871fbd 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -215,4 +215,5 @@ static void __exit preemptirq_delay_exit(void)
module_init(preemptirq_delay_init)
module_exit(preemptirq_delay_exit)
+MODULE_DESCRIPTION("Preempt / IRQ disable delay thread to test latency tracers");
MODULE_LICENSE("GPL v2");
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6511dc3a0..54887f4c3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1449,6 +1449,11 @@ static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
*
* As a safety measure we check to make sure the data pages have not
* been corrupted.
+ *
+ * Callers of this function need to guarantee that the list of pages doesn't get
+ * modified during the check. In particular, if it's possible that the function
+ * is invoked with concurrent readers which can swap in a new reader page then
+ * the caller should take cpu_buffer->reader_lock.
*/
static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
@@ -2200,8 +2205,12 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
*/
synchronize_rcu();
for_each_buffer_cpu(buffer, cpu) {
+ unsigned long flags;
+
cpu_buffer = buffer->buffers[cpu];
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
rb_check_pages(cpu_buffer);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
atomic_dec(&buffer->record_disabled);
}
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index 2f68e93ff..df0745a42 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -245,6 +245,7 @@ static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync)
/**
* rv_disable_monitor - disable a given runtime monitor
+ * @mdef: Pointer to the monitor definition structure.
*
* Returns 0 on success.
*/
@@ -256,6 +257,7 @@ int rv_disable_monitor(struct rv_monitor_def *mdef)
/**
* rv_enable_monitor - enable a given runtime monitor
+ * @mdef: Pointer to the monitor definition structure.
*
* Returns 0 on success, error otherwise.
*/
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 70d428c39..82b191f33 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -1990,6 +1990,80 @@ static int user_event_set_tp_name(struct user_event *user)
}
/*
+ * Counts how many ';' without a trailing space are in the args.
+ */
+static int count_semis_no_space(char *args)
+{
+ int count = 0;
+
+ while ((args = strchr(args, ';'))) {
+ args++;
+
+ if (!isspace(*args))
+ count++;
+ }
+
+ return count;
+}
+
+/*
+ * Copies the arguments while ensuring all ';' have a trailing space.
+ */
+static char *insert_space_after_semis(char *args, int count)
+{
+ char *fixed, *pos;
+ int len;
+
+ len = strlen(args) + count;
+ fixed = kmalloc(len + 1, GFP_KERNEL);
+
+ if (!fixed)
+ return NULL;
+
+ pos = fixed;
+
+ /* Insert a space after ';' if there is no trailing space. */
+ while (*args) {
+ *pos = *args++;
+
+ if (*pos++ == ';' && !isspace(*args))
+ *pos++ = ' ';
+ }
+
+ *pos = '\0';
+
+ return fixed;
+}
+
+static char **user_event_argv_split(char *args, int *argc)
+{
+ char **split;
+ char *fixed;
+ int count;
+
+ /* Count how many ';' without a trailing space */
+ count = count_semis_no_space(args);
+
+ /* No fixup is required */
+ if (!count)
+ return argv_split(GFP_KERNEL, args, argc);
+
+ /* We must fixup 'field;field' to 'field; field' */
+ fixed = insert_space_after_semis(args, count);
+
+ if (!fixed)
+ return NULL;
+
+ /* We do a normal split afterwards */
+ split = argv_split(GFP_KERNEL, fixed, argc);
+
+ /* We can free since argv_split makes a copy */
+ kfree(fixed);
+
+ return split;
+}
+
+/*
* Parses the event name, arguments and flags then registers if successful.
* The name buffer lifetime is owned by this method for success cases only.
* Upon success the returned user_event has its ref count increased by 1.
@@ -2012,7 +2086,7 @@ static int user_event_parse(struct user_event_group *group, char *name,
return -EPERM;
if (args) {
- argv = argv_split(GFP_KERNEL, args, &argc);
+ argv = user_event_argv_split(args, &argc);
if (!argv)
return -ENOMEM;
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 42bc0f362..1a7e7cf94 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -553,6 +553,10 @@ static int parse_btf_field(char *fieldname, const struct btf_type *type,
anon_offs = 0;
field = btf_find_struct_member(ctx->btf, type, fieldname,
&anon_offs);
+ if (IS_ERR(field)) {
+ trace_probe_log_err(ctx->offset, BAD_BTF_TID);
+ return PTR_ERR(field);
+ }
if (!field) {
trace_probe_log_err(ctx->offset, NO_BTF_FIELD);
return -ENOENT;