summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-08 04:21:33 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-08 04:21:33 +0000
commit282c335ad1bf4d21fcedff132e19995c24c09adc (patch)
treed24dc7bfbb3a6b4bfd5b46964347ada86f72d751 /kernel
parentAdding upstream version 4.19.289. (diff)
downloadlinux-282c335ad1bf4d21fcedff132e19995c24c09adc.tar.xz
linux-282c335ad1bf4d21fcedff132e19995c24c09adc.zip
Adding upstream version 4.19.304.upstream/4.19.304upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/async.c4
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/audit_watch.c9
-rw-r--r--kernel/auditsc.c2
-rw-r--r--kernel/bpf/bpf_lru_list.c21
-rw-r--r--kernel/bpf/bpf_lru_list.h7
-rw-r--r--kernel/cgroup/cgroup-v1.c5
-rw-r--r--kernel/cgroup/namespace.c6
-rw-r--r--kernel/cpu.c8
-rw-r--r--kernel/cred.c66
-rw-r--r--kernel/dma/debug.c2
-rw-r--r--kernel/events/core.c138
-rw-r--r--kernel/events/ring_buffer.c11
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/futex.c12
-rw-r--r--kernel/irq/generic-chip.c25
-rw-r--r--kernel/irq/matrix.c6
-rw-r--r--kernel/kexec_core.c5
-rw-r--r--kernel/locking/lockdep.c6
-rw-r--r--kernel/locking/test-ww_mutex.c20
-rw-r--r--kernel/module.c15
-rw-r--r--kernel/padata.c2
-rw-r--r--kernel/power/snapshot.c16
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/idle.c22
-rw-r--r--kernel/sched/rt.c5
-rw-r--r--kernel/time/hrtimer.c33
-rw-r--r--kernel/time/posix-timers.c31
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/trace/ftrace.c75
-rw-r--r--kernel/trace/ring_buffer.c52
-rw-r--r--kernel/trace/trace.c68
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_events.c15
-rw-r--r--kernel/trace/trace_events_hist.c9
-rw-r--r--kernel/trace/trace_irqsoff.c3
-rw-r--r--kernel/trace/trace_sched_wakeup.c2
-rw-r--r--kernel/trace/trace_uprobe.c3
-rw-r--r--kernel/watchdog_hld.c6
-rw-r--r--kernel/workqueue.c21
41 files changed, 461 insertions, 282 deletions
diff --git a/kernel/async.c b/kernel/async.c
index 4bf1b00a2..e59bd2240 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -115,7 +115,7 @@ static void async_run_entry_fn(struct work_struct *work)
struct async_entry *entry =
container_of(work, struct async_entry, work);
unsigned long flags;
- ktime_t uninitialized_var(calltime), delta, rettime;
+ ktime_t calltime, delta, rettime;
/* 1) run (and print duration) */
if (initcall_debug && system_state < SYSTEM_RUNNING) {
@@ -283,7 +283,7 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
*/
void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain)
{
- ktime_t uninitialized_var(starttime), delta, endtime;
+ ktime_t starttime, delta, endtime;
if (initcall_debug && system_state < SYSTEM_RUNNING) {
pr_debug("async_waiting @ %i\n", task_pid_nr(current));
diff --git a/kernel/audit.c b/kernel/audit.c
index 7dc14a4d9..471d3ad91 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1796,7 +1796,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
{
struct audit_buffer *ab;
struct timespec64 t;
- unsigned int uninitialized_var(serial);
+ unsigned int serial;
if (audit_initialized != AUDIT_INITIALIZED)
return NULL;
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 50952d6d8..ff33536ae 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -557,11 +557,18 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
unsigned long ino;
dev_t dev;
- exe_file = get_task_exe_file(tsk);
+ /* only do exe filtering if we are recording @current events/records */
+ if (tsk != current)
+ return 0;
+
+ if (!current->mm)
+ return 0;
+ exe_file = get_mm_exe_file(current->mm);
if (!exe_file)
return 0;
ino = file_inode(exe_file)->i_ino;
dev = file_inode(exe_file)->i_sb->s_dev;
fput(exe_file);
+
return audit_mark_compare(mark, ino, dev);
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1513873e2..e4de5b9d5 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1923,6 +1923,8 @@ void __audit_inode_child(struct inode *parent,
}
}
+ cond_resched();
+
/* is there a matching child entry? */
list_for_each_entry(n, &context->names_list, list) {
/* can only match entries that have a name */
diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
index 9b5eeff72..39a0e768a 100644
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -44,7 +44,12 @@ static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l)
/* bpf_lru_node helpers */
static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
{
- return node->ref;
+ return READ_ONCE(node->ref);
+}
+
+static void bpf_lru_node_clear_ref(struct bpf_lru_node *node)
+{
+ WRITE_ONCE(node->ref, 0);
}
static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
@@ -92,7 +97,7 @@ static void __bpf_lru_node_move_in(struct bpf_lru_list *l,
bpf_lru_list_count_inc(l, tgt_type);
node->type = tgt_type;
- node->ref = 0;
+ bpf_lru_node_clear_ref(node);
list_move(&node->list, &l->lists[tgt_type]);
}
@@ -113,7 +118,7 @@ static void __bpf_lru_node_move(struct bpf_lru_list *l,
bpf_lru_list_count_inc(l, tgt_type);
node->type = tgt_type;
}
- node->ref = 0;
+ bpf_lru_node_clear_ref(node);
/* If the moving node is the next_inactive_rotation candidate,
* move the next_inactive_rotation pointer also.
@@ -356,7 +361,7 @@ static void __local_list_add_pending(struct bpf_lru *lru,
*(u32 *)((void *)node + lru->hash_offset) = hash;
node->cpu = cpu;
node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
- node->ref = 0;
+ bpf_lru_node_clear_ref(node);
list_add(&node->list, local_pending_list(loc_l));
}
@@ -422,7 +427,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru,
if (!list_empty(free_list)) {
node = list_first_entry(free_list, struct bpf_lru_node, list);
*(u32 *)((void *)node + lru->hash_offset) = hash;
- node->ref = 0;
+ bpf_lru_node_clear_ref(node);
__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
}
@@ -525,7 +530,7 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru,
}
node->type = BPF_LRU_LOCAL_LIST_T_FREE;
- node->ref = 0;
+ bpf_lru_node_clear_ref(node);
list_move(&node->list, local_free_list(loc_l));
raw_spin_unlock_irqrestore(&loc_l->lock, flags);
@@ -571,7 +576,7 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
node = (struct bpf_lru_node *)(buf + node_offset);
node->type = BPF_LRU_LIST_T_FREE;
- node->ref = 0;
+ bpf_lru_node_clear_ref(node);
list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
buf += elem_size;
}
@@ -597,7 +602,7 @@ again:
node = (struct bpf_lru_node *)(buf + node_offset);
node->cpu = cpu;
node->type = BPF_LRU_LIST_T_FREE;
- node->ref = 0;
+ bpf_lru_node_clear_ref(node);
list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
i++;
buf += elem_size;
diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h
index 7d4f89b7c..08da78b59 100644
--- a/kernel/bpf/bpf_lru_list.h
+++ b/kernel/bpf/bpf_lru_list.h
@@ -66,11 +66,8 @@ struct bpf_lru {
static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
{
- /* ref is an approximation on access frequency. It does not
- * have to be very accurate. Hence, no protection is used.
- */
- if (!node->ref)
- node->ref = 1;
+ if (!READ_ONCE(node->ref))
+ WRITE_ONCE(node->ref, 1);
}
int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index c0ebb7080..55a61deb3 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -395,10 +395,9 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
}
css_task_iter_end(&it);
length = n;
- /* now sort & (if procs) strip out duplicates */
+ /* now sort & strip out duplicates (tgids or recycled thread PIDs) */
sort(array, length, sizeof(pid_t), cmppid, NULL);
- if (type == CGROUP_FILE_PROCS)
- length = pidlist_uniq(array, length);
+ length = pidlist_uniq(array, length);
l = cgroup_pidlist_find_create(cgrp, type);
if (!l) {
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
index b05f1dd58..313e66b8c 100644
--- a/kernel/cgroup/namespace.c
+++ b/kernel/cgroup/namespace.c
@@ -148,9 +148,3 @@ const struct proc_ns_operations cgroupns_operations = {
.install = cgroupns_install,
.owner = cgroupns_owner,
};
-
-static __init int cgroup_namespaces_init(void)
-{
- return 0;
-}
-subsys_initcall(cgroup_namespaces_init);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index c9ca190ec..34c09c3d3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1418,7 +1418,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
[CPUHP_HRTIMERS_PREPARE] = {
.name = "hrtimers:prepare",
.startup.single = hrtimers_prepare_cpu,
- .teardown.single = hrtimers_dead_cpu,
+ .teardown.single = NULL,
},
[CPUHP_SMPCFD_PREPARE] = {
.name = "smpcfd:prepare",
@@ -1485,6 +1485,12 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = NULL,
.teardown.single = smpcfd_dying_cpu,
},
+ [CPUHP_AP_HRTIMERS_DYING] = {
+ .name = "hrtimers:dying",
+ .startup.single = NULL,
+ .teardown.single = hrtimers_cpu_dying,
+ },
+
/* Entry state on starting. Interrupts enabled from here on. Transient
* state for synchronsization */
[CPUHP_AP_ONLINE] = {
diff --git a/kernel/cred.c b/kernel/cred.c
index a9f0f8b21..8c58f0f63 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -101,17 +101,17 @@ static void put_cred_rcu(struct rcu_head *rcu)
#ifdef CONFIG_DEBUG_CREDENTIALS
if (cred->magic != CRED_MAGIC_DEAD ||
- atomic_read(&cred->usage) != 0 ||
+ atomic_long_read(&cred->usage) != 0 ||
read_cred_subscribers(cred) != 0)
panic("CRED: put_cred_rcu() sees %p with"
- " mag %x, put %p, usage %d, subscr %d\n",
+ " mag %x, put %p, usage %ld, subscr %d\n",
cred, cred->magic, cred->put_addr,
- atomic_read(&cred->usage),
+ atomic_long_read(&cred->usage),
read_cred_subscribers(cred));
#else
- if (atomic_read(&cred->usage) != 0)
- panic("CRED: put_cred_rcu() sees %p with usage %d\n",
- cred, atomic_read(&cred->usage));
+ if (atomic_long_read(&cred->usage) != 0)
+ panic("CRED: put_cred_rcu() sees %p with usage %ld\n",
+ cred, atomic_long_read(&cred->usage));
#endif
security_cred_free(cred);
@@ -134,11 +134,11 @@ static void put_cred_rcu(struct rcu_head *rcu)
*/
void __put_cred(struct cred *cred)
{
- kdebug("__put_cred(%p{%d,%d})", cred,
- atomic_read(&cred->usage),
+ kdebug("__put_cred(%p{%ld,%d})", cred,
+ atomic_long_read(&cred->usage),
read_cred_subscribers(cred));
- BUG_ON(atomic_read(&cred->usage) != 0);
+ BUG_ON(atomic_long_read(&cred->usage) != 0);
#ifdef CONFIG_DEBUG_CREDENTIALS
BUG_ON(read_cred_subscribers(cred) != 0);
cred->magic = CRED_MAGIC_DEAD;
@@ -161,8 +161,8 @@ void exit_creds(struct task_struct *tsk)
{
struct cred *cred;
- kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred,
- atomic_read(&tsk->cred->usage),
+ kdebug("exit_creds(%u,%p,%p,{%ld,%d})", tsk->pid, tsk->real_cred, tsk->cred,
+ atomic_long_read(&tsk->cred->usage),
read_cred_subscribers(tsk->cred));
cred = (struct cred *) tsk->real_cred;
@@ -197,7 +197,7 @@ const struct cred *get_task_cred(struct task_struct *task)
do {
cred = __task_cred((task));
BUG_ON(!cred);
- } while (!atomic_inc_not_zero(&((struct cred *)cred)->usage));
+ } while (!atomic_long_inc_not_zero(&((struct cred *)cred)->usage));
rcu_read_unlock();
return cred;
@@ -215,7 +215,7 @@ struct cred *cred_alloc_blank(void)
if (!new)
return NULL;
- atomic_set(&new->usage, 1);
+ atomic_long_set(&new->usage, 1);
#ifdef CONFIG_DEBUG_CREDENTIALS
new->magic = CRED_MAGIC;
#endif
@@ -262,7 +262,7 @@ struct cred *prepare_creds(void)
memcpy(new, old, sizeof(struct cred));
new->non_rcu = 0;
- atomic_set(&new->usage, 1);
+ atomic_long_set(&new->usage, 1);
set_cred_subscribers(new, 0);
get_group_info(new->group_info);
get_uid(new->user);
@@ -338,8 +338,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
p->real_cred = get_cred(p->cred);
get_cred(p->cred);
alter_cred_subscribers(p->cred, 2);
- kdebug("share_creds(%p{%d,%d})",
- p->cred, atomic_read(&p->cred->usage),
+ kdebug("share_creds(%p{%ld,%d})",
+ p->cred, atomic_long_read(&p->cred->usage),
read_cred_subscribers(p->cred));
atomic_inc(&p->cred->user->processes);
return 0;
@@ -429,8 +429,8 @@ int commit_creds(struct cred *new)
struct task_struct *task = current;
const struct cred *old = task->real_cred;
- kdebug("commit_creds(%p{%d,%d})", new,
- atomic_read(&new->usage),
+ kdebug("commit_creds(%p{%ld,%d})", new,
+ atomic_long_read(&new->usage),
read_cred_subscribers(new));
BUG_ON(task->cred != old);
@@ -439,7 +439,7 @@ int commit_creds(struct cred *new)
validate_creds(old);
validate_creds(new);
#endif
- BUG_ON(atomic_read(&new->usage) < 1);
+ BUG_ON(atomic_long_read(&new->usage) < 1);
get_cred(new); /* we will require a ref for the subj creds too */
@@ -512,14 +512,14 @@ EXPORT_SYMBOL(commit_creds);
*/
void abort_creds(struct cred *new)
{
- kdebug("abort_creds(%p{%d,%d})", new,
- atomic_read(&new->usage),
+ kdebug("abort_creds(%p{%ld,%d})", new,
+ atomic_long_read(&new->usage),
read_cred_subscribers(new));
#ifdef CONFIG_DEBUG_CREDENTIALS
BUG_ON(read_cred_subscribers(new) != 0);
#endif
- BUG_ON(atomic_read(&new->usage) < 1);
+ BUG_ON(atomic_long_read(&new->usage) < 1);
put_cred(new);
}
EXPORT_SYMBOL(abort_creds);
@@ -535,8 +535,8 @@ const struct cred *override_creds(const struct cred *new)
{
const struct cred *old = current->cred;
- kdebug("override_creds(%p{%d,%d})", new,
- atomic_read(&new->usage),
+ kdebug("override_creds(%p{%ld,%d})", new,
+ atomic_long_read(&new->usage),
read_cred_subscribers(new));
validate_creds(old);
@@ -558,8 +558,8 @@ const struct cred *override_creds(const struct cred *new)
rcu_assign_pointer(current->cred, new);
alter_cred_subscribers(old, -1);
- kdebug("override_creds() = %p{%d,%d}", old,
- atomic_read(&old->usage),
+ kdebug("override_creds() = %p{%ld,%d}", old,
+ atomic_long_read(&old->usage),
read_cred_subscribers(old));
return old;
}
@@ -576,8 +576,8 @@ void revert_creds(const struct cred *old)
{
const struct cred *override = current->cred;
- kdebug("revert_creds(%p{%d,%d})", old,
- atomic_read(&old->usage),
+ kdebug("revert_creds(%p{%ld,%d})", old,
+ atomic_long_read(&old->usage),
read_cred_subscribers(old));
validate_creds(old);
@@ -637,7 +637,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
*new = *old;
new->non_rcu = 0;
- atomic_set(&new->usage, 1);
+ atomic_long_set(&new->usage, 1);
set_cred_subscribers(new, 0);
get_uid(new->user);
get_user_ns(new->user_ns);
@@ -760,8 +760,8 @@ static void dump_invalid_creds(const struct cred *cred, const char *label,
cred == tsk->cred ? "[eff]" : "");
printk(KERN_ERR "CRED: ->magic=%x, put_addr=%p\n",
cred->magic, cred->put_addr);
- printk(KERN_ERR "CRED: ->usage=%d, subscr=%d\n",
- atomic_read(&cred->usage),
+ printk(KERN_ERR "CRED: ->usage=%ld, subscr=%d\n",
+ atomic_long_read(&cred->usage),
read_cred_subscribers(cred));
printk(KERN_ERR "CRED: ->*uid = { %d,%d,%d,%d }\n",
from_kuid_munged(&init_user_ns, cred->uid),
@@ -833,9 +833,9 @@ EXPORT_SYMBOL(__validate_process_creds);
*/
void validate_creds_for_do_exit(struct task_struct *tsk)
{
- kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})",
+ kdebug("validate_creds_for_do_exit(%p,%p{%ld,%d})",
tsk->real_cred, tsk->cred,
- atomic_read(&tsk->cred->usage),
+ atomic_long_read(&tsk->cred->usage),
read_cred_subscribers(tsk->cred));
__validate_process_creds(tsk, __FILE__, __LINE__);
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 7c6cd00d0..c345a6e2f 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -963,7 +963,7 @@ static int device_dma_allocations(struct device *dev, struct dma_debug_entry **o
static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
{
struct device *dev = data;
- struct dma_debug_entry *uninitialized_var(entry);
+ struct dma_debug_entry *entry;
int count;
if (dma_debug_disabled())
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2bf4b6b10..1f215aa8f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1133,6 +1133,11 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
return 0;
}
+static int perf_mux_hrtimer_restart_ipi(void *arg)
+{
+ return perf_mux_hrtimer_restart(arg);
+}
+
void perf_pmu_disable(struct pmu *pmu)
{
int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -1705,28 +1710,34 @@ static inline void perf_event__state_init(struct perf_event *event)
PERF_EVENT_STATE_INACTIVE;
}
-static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
+static int __perf_event_read_size(u64 read_format, int nr_siblings)
{
int entry = sizeof(u64); /* value */
int size = 0;
int nr = 1;
- if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
size += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
size += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_ID)
+ if (read_format & PERF_FORMAT_ID)
entry += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_GROUP) {
+ if (read_format & PERF_FORMAT_LOST)
+ entry += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_GROUP) {
nr += nr_siblings;
size += sizeof(u64);
}
- size += entry * nr;
- event->read_size = size;
+ /*
+ * Since perf_event_validate_size() limits this to 16k and inhibits
+ * adding more siblings, this will never overflow.
+ */
+ return size + nr * entry;
}
static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
@@ -1767,8 +1778,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
*/
static void perf_event__header_size(struct perf_event *event)
{
- __perf_event_read_size(event,
- event->group_leader->nr_siblings);
+ event->read_size =
+ __perf_event_read_size(event->attr.read_format,
+ event->group_leader->nr_siblings);
__perf_event_header_size(event, event->attr.sample_type);
}
@@ -1799,23 +1811,44 @@ static void perf_event__id_header_size(struct perf_event *event)
event->id_header_size = size;
}
+/*
+ * Check that adding an event to the group does not result in anybody
+ * overflowing the 64k event limit imposed by the output buffer.
+ *
+ * Specifically, check that the read_size for the event does not exceed 16k,
+ * read_size being the one term that grows with groups size. Since read_size
+ * depends on per-event read_format, also (re)check the existing events.
+ *
+ * This leaves 48k for the constant size fields and things like callchains,
+ * branch stacks and register sets.
+ */
static bool perf_event_validate_size(struct perf_event *event)
{
- /*
- * The values computed here will be over-written when we actually
- * attach the event.
- */
- __perf_event_read_size(event, event->group_leader->nr_siblings + 1);
- __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
- perf_event__id_header_size(event);
+ struct perf_event *sibling, *group_leader = event->group_leader;
+
+ if (__perf_event_read_size(event->attr.read_format,
+ group_leader->nr_siblings + 1) > 16*1024)
+ return false;
+
+ if (__perf_event_read_size(group_leader->attr.read_format,
+ group_leader->nr_siblings + 1) > 16*1024)
+ return false;
/*
- * Sum the lot; should not exceed the 64k limit we have on records.
- * Conservative limit to allow for callchains and other variable fields.
+ * When creating a new group leader, group_leader->ctx is initialized
+ * after the size has been validated, but we cannot safely use
+ * for_each_sibling_event() until group_leader->ctx is set. A new group
+ * leader cannot have any siblings yet, so we can safely skip checking
+ * the non-existent siblings.
*/
- if (event->read_size + event->header_size +
- event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
- return false;
+ if (event == group_leader)
+ return true;
+
+ for_each_sibling_event(sibling, group_leader) {
+ if (__perf_event_read_size(sibling->attr.read_format,
+ group_leader->nr_siblings + 1) > 16*1024)
+ return false;
+ }
return true;
}
@@ -1843,6 +1876,7 @@ static void perf_group_attach(struct perf_event *event)
list_add_tail(&event->sibling_list, &group_leader->sibling_list);
group_leader->nr_siblings++;
+ group_leader->group_generation++;
perf_event__header_size(group_leader);
@@ -1913,6 +1947,7 @@ static void perf_group_detach(struct perf_event *event)
if (event->group_leader != event) {
list_del_init(&event->sibling_list);
event->group_leader->nr_siblings--;
+ event->group_leader->group_generation++;
goto out;
}
@@ -4750,7 +4785,7 @@ static int __perf_read_group_add(struct perf_event *leader,
u64 read_format, u64 *values)
{
struct perf_event_context *ctx = leader->ctx;
- struct perf_event *sub;
+ struct perf_event *sub, *parent;
unsigned long flags;
int n = 1; /* skip @nr */
int ret;
@@ -4760,6 +4795,33 @@ static int __perf_read_group_add(struct perf_event *leader,
return ret;
raw_spin_lock_irqsave(&ctx->lock, flags);
+ /*
+ * Verify the grouping between the parent and child (inherited)
+ * events is still in tact.
+ *
+ * Specifically:
+ * - leader->ctx->lock pins leader->sibling_list
+ * - parent->child_mutex pins parent->child_list
+ * - parent->ctx->mutex pins parent->sibling_list
+ *
+ * Because parent->ctx != leader->ctx (and child_list nests inside
+ * ctx->mutex), group destruction is not atomic between children, also
+ * see perf_event_release_kernel(). Additionally, parent can grow the
+ * group.
+ *
+ * Therefore it is possible to have parent and child groups in a
+ * different configuration and summing over such a beast makes no sense
+ * what so ever.
+ *
+ * Reject this.
+ */
+ parent = leader->parent;
+ if (parent &&
+ (parent->group_generation != leader->group_generation ||
+ parent->nr_siblings != leader->nr_siblings)) {
+ ret = -ECHILD;
+ goto unlock;
+ }
/*
* Since we co-schedule groups, {enabled,running} times of siblings
@@ -4782,15 +4844,20 @@ static int __perf_read_group_add(struct perf_event *leader,
values[n++] += perf_event_count(leader);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&leader->lost_samples);
for_each_sibling_event(sub, leader) {
values[n++] += perf_event_count(sub);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&sub->lost_samples);
}
+unlock:
raw_spin_unlock_irqrestore(&ctx->lock, flags);
- return 0;
+ return ret;
}
static int perf_read_group(struct perf_event *event,
@@ -4809,10 +4876,6 @@ static int perf_read_group(struct perf_event *event,
values[0] = 1 + leader->nr_siblings;
- /*
- * By locking the child_mutex of the leader we effectively
- * lock the child list of all siblings.. XXX explain how.
- */
mutex_lock(&leader->child_mutex);
ret = __perf_read_group_add(leader, read_format, values);
@@ -4843,7 +4906,7 @@ static int perf_read_one(struct perf_event *event,
u64 read_format, char __user *buf)
{
u64 enabled, running;
- u64 values[4];
+ u64 values[5];
int n = 0;
values[n++] = __perf_event_read_value(event, &enabled, &running);
@@ -4853,6 +4916,8 @@ static int perf_read_one(struct perf_event *event,
values[n++] = running;
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(event);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&event->lost_samples);
if (copy_to_user(buf, values, n * sizeof(u64)))
return -EFAULT;
@@ -6162,7 +6227,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
u64 enabled, u64 running)
{
u64 read_format = event->attr.read_format;
- u64 values[4];
+ u64 values[5];
int n = 0;
values[n++] = perf_event_count(event);
@@ -6176,6 +6241,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
}
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(event);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&event->lost_samples);
__output_copy(handle, values, n * sizeof(u64));
}
@@ -6186,7 +6253,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
{
struct perf_event *leader = event->group_leader, *sub;
u64 read_format = event->attr.read_format;
- u64 values[5];
+ u64 values[6];
int n = 0;
values[n++] = 1 + leader->nr_siblings;
@@ -6204,6 +6271,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
values[n++] = perf_event_count(leader);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&leader->lost_samples);
__output_copy(handle, values, n * sizeof(u64));
@@ -6217,6 +6286,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
values[n++] = perf_event_count(sub);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&sub->lost_samples);
__output_copy(handle, values, n * sizeof(u64));
}
@@ -9644,8 +9715,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
- cpu_function_call(cpu,
- (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
+ cpu_function_call(cpu, perf_mux_hrtimer_restart_ipi, cpuctx);
}
cpus_read_unlock();
mutex_unlock(&mux_interval_mutex);
@@ -10575,7 +10645,7 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event *group_leader = NULL, *output_event = NULL;
struct perf_event *event, *sibling;
struct perf_event_attr attr;
- struct perf_event_context *ctx, *uninitialized_var(gctx);
+ struct perf_event_context *ctx, *gctx;
struct file *event_file = NULL;
struct fd group = {NULL, 0};
struct task_struct *task = NULL;
@@ -11599,6 +11669,8 @@ static int inherit_group(struct perf_event *parent_event,
if (IS_ERR(child_ctr))
return PTR_ERR(child_ctr);
}
+ if (leader)
+ leader->group_generation = parent_event->group_generation;
return 0;
}
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 12f351b25..ddcbd03ec 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -165,8 +165,10 @@ __perf_output_begin(struct perf_output_handle *handle,
goto out;
if (unlikely(rb->paused)) {
- if (rb->nr_pages)
+ if (rb->nr_pages) {
local_inc(&rb->lost);
+ atomic64_inc(&event->lost_samples);
+ }
goto out;
}
@@ -249,6 +251,7 @@ __perf_output_begin(struct perf_output_handle *handle,
fail:
local_inc(&rb->lost);
+ atomic64_inc(&event->lost_samples);
perf_output_put_handle(handle);
out:
rcu_read_unlock();
@@ -639,6 +642,12 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
}
}
+ /*
+ * kcalloc_node() is unable to allocate buffer if the size is larger
+ * than: PAGE_SIZE << MAX_ORDER; directly bail out in this case.
+ */
+ if (get_order((unsigned long)nr_pages * sizeof(void *)) > MAX_ORDER)
+ return -ENOMEM;
rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL,
node);
if (!rb->aux_pages)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 24342bca1..72ae05d65 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1887,7 +1887,7 @@ static void handle_swbp(struct pt_regs *regs)
{
struct uprobe *uprobe;
unsigned long bp_vaddr;
- int uninitialized_var(is_swbp);
+ int is_swbp;
bp_vaddr = uprobe_get_swbp_addr(regs);
if (bp_vaddr == get_trampoline_vaddr())
diff --git a/kernel/exit.c b/kernel/exit.c
index 02360ec3b..0d1cca15e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -140,7 +140,7 @@ static void __exit_signal(struct task_struct *tsk)
struct signal_struct *sig = tsk->signal;
bool group_dead = thread_group_leader(tsk);
struct sighand_struct *sighand;
- struct tty_struct *uninitialized_var(tty);
+ struct tty_struct *tty;
u64 utime, stime;
sighand = rcu_dereference_check(tsk->sighand,
diff --git a/kernel/futex.c b/kernel/futex.c
index 3c67da9b8..ca2a2a894 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1398,7 +1398,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
{
int err;
- u32 uninitialized_var(curval);
+ u32 curval;
if (unlikely(should_fail_futex(true)))
return -EFAULT;
@@ -1569,7 +1569,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
*/
static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
{
- u32 uninitialized_var(curval), newval;
+ u32 curval, newval;
struct task_struct *new_owner;
bool postunlock = false;
DEFINE_WAKE_Q(wake_q);
@@ -3083,7 +3083,7 @@ uaddr_faulted:
*/
static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
{
- u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
+ u32 curval, uval, vpid = task_pid_vnr(current);
union futex_key key = FUTEX_KEY_INIT;
struct futex_hash_bucket *hb;
struct futex_q *top_waiter;
@@ -3558,7 +3558,7 @@ err_unlock:
static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
bool pi, bool pending_op)
{
- u32 uval, uninitialized_var(nval), mval;
+ u32 uval, nval, mval;
int err;
/* Futex address must be 32bit aligned */
@@ -3688,7 +3688,7 @@ static void exit_robust_list(struct task_struct *curr)
struct robust_list_head __user *head = curr->robust_list;
struct robust_list __user *entry, *next_entry, *pending;
unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
- unsigned int uninitialized_var(next_pi);
+ unsigned int next_pi;
unsigned long futex_offset;
int rc;
@@ -3987,7 +3987,7 @@ static void compat_exit_robust_list(struct task_struct *curr)
struct compat_robust_list_head __user *head = curr->compat_robust_list;
struct robust_list __user *entry, *next_entry, *pending;
unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
- unsigned int uninitialized_var(next_pi);
+ unsigned int next_pi;
compat_uptr_t uentry, next_uentry, upending;
compat_long_t futex_offset;
int rc;
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index e2999a070..4195e7ad1 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -537,21 +537,34 @@ EXPORT_SYMBOL_GPL(irq_setup_alt_chip);
void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
unsigned int clr, unsigned int set)
{
- unsigned int i = gc->irq_base;
+ unsigned int i, virq;
raw_spin_lock(&gc_lock);
list_del(&gc->list);
raw_spin_unlock(&gc_lock);
- for (; msk; msk >>= 1, i++) {
+ for (i = 0; msk; msk >>= 1, i++) {
if (!(msk & 0x01))
continue;
+ /*
+ * Interrupt domain based chips store the base hardware
+ * interrupt number in gc::irq_base. Otherwise gc::irq_base
+ * contains the base Linux interrupt number.
+ */
+ if (gc->domain) {
+ virq = irq_find_mapping(gc->domain, gc->irq_base + i);
+ if (!virq)
+ continue;
+ } else {
+ virq = gc->irq_base + i;
+ }
+
/* Remove handler first. That will mask the irq line */
- irq_set_handler(i, NULL);
- irq_set_chip(i, &no_irq_chip);
- irq_set_chip_data(i, NULL);
- irq_modify_status(i, clr, set);
+ irq_set_handler(virq, NULL);
+ irq_set_chip(virq, &no_irq_chip);
+ irq_set_chip_data(virq, NULL);
+ irq_modify_status(virq, clr, set);
}
}
EXPORT_SYMBOL_GPL(irq_remove_generic_chip);
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 8e586858b..d25edbb87 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -466,16 +466,16 @@ unsigned int irq_matrix_reserved(struct irq_matrix *m)
}
/**
- * irq_matrix_allocated - Get the number of allocated irqs on the local cpu
+ * irq_matrix_allocated - Get the number of allocated non-managed irqs on the local CPU
* @m: Pointer to the matrix to search
*
- * This returns number of allocated irqs
+ * This returns number of allocated non-managed interrupts.
*/
unsigned int irq_matrix_allocated(struct irq_matrix *m)
{
struct cpumap *cm = this_cpu_ptr(m->maps);
- return cm->allocated;
+ return cm->allocated - cm->managed_allocated;
}
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 6b3d7f721..3666d434a 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1020,6 +1020,7 @@ int crash_shrink_memory(unsigned long new_size)
start = crashk_res.start;
end = crashk_res.end;
old_size = (end == 0) ? 0 : end - start + 1;
+ new_size = roundup(new_size, KEXEC_CRASH_MEM_ALIGN);
if (new_size >= old_size) {
ret = (new_size == old_size) ? 0 : -EINVAL;
goto unlock;
@@ -1031,9 +1032,7 @@ int crash_shrink_memory(unsigned long new_size)
goto unlock;
}
- start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
- end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
-
+ end = start + new_size;
crash_free_reserved_phys_range(end, crashk_res.end);
if ((start == end) && (crashk_res.parent != NULL))
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 4dc79f57a..46a6d1f7c 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1246,7 +1246,7 @@ static int noop_count(struct lock_list *entry, void *data)
static unsigned long __lockdep_count_forward_deps(struct lock_list *this)
{
unsigned long count = 0;
- struct lock_list *uninitialized_var(target_entry);
+ struct lock_list *target_entry;
__bfs_forwards(this, (void *)&count, noop_count, &target_entry);
@@ -1274,7 +1274,7 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class)
static unsigned long __lockdep_count_backward_deps(struct lock_list *this)
{
unsigned long count = 0;
- struct lock_list *uninitialized_var(target_entry);
+ struct lock_list *target_entry;
__bfs_backwards(this, (void *)&count, noop_count, &target_entry);
@@ -2662,7 +2662,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
{
int ret;
struct lock_list root;
- struct lock_list *uninitialized_var(target_entry);
+ struct lock_list *target_entry;
root.parent = NULL;
root.class = hlock_class(this);
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index 65a3b7e55..4fd05d9d5 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -439,7 +439,6 @@ retry:
} while (!time_after(jiffies, stress->timeout));
kfree(order);
- kfree(stress);
}
struct reorder_lock {
@@ -504,7 +503,6 @@ out:
list_for_each_entry_safe(ll, ln, &locks, link)
kfree(ll);
kfree(order);
- kfree(stress);
}
static void stress_one_work(struct work_struct *work)
@@ -525,8 +523,6 @@ static void stress_one_work(struct work_struct *work)
break;
}
} while (!time_after(jiffies, stress->timeout));
-
- kfree(stress);
}
#define STRESS_INORDER BIT(0)
@@ -537,15 +533,24 @@ static void stress_one_work(struct work_struct *work)
static int stress(int nlocks, int nthreads, unsigned int flags)
{
struct ww_mutex *locks;
- int n;
+ struct stress *stress_array;
+ int n, count;
locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL);
if (!locks)
return -ENOMEM;
+ stress_array = kmalloc_array(nthreads, sizeof(*stress_array),
+ GFP_KERNEL);
+ if (!stress_array) {
+ kfree(locks);
+ return -ENOMEM;
+ }
+
for (n = 0; n < nlocks; n++)
ww_mutex_init(&locks[n], &ww_class);
+ count = 0;
for (n = 0; nthreads; n++) {
struct stress *stress;
void (*fn)(struct work_struct *work);
@@ -569,9 +574,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags)
if (!fn)
continue;
- stress = kmalloc(sizeof(*stress), GFP_KERNEL);
- if (!stress)
- break;
+ stress = &stress_array[count++];
INIT_WORK(&stress->work, fn);
stress->locks = locks;
@@ -586,6 +589,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags)
for (n = 0; n < nlocks; n++)
ww_mutex_destroy(&locks[n]);
+ kfree(stress_array);
kfree(locks);
return 0;
diff --git a/kernel/module.c b/kernel/module.c
index 6ec0b2e0f..2ec961945 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2261,15 +2261,26 @@ static void free_module(struct module *mod)
void *__symbol_get(const char *symbol)
{
struct module *owner;
+ enum mod_license license;
const struct kernel_symbol *sym;
preempt_disable();
- sym = find_symbol(symbol, &owner, NULL, NULL, true, true);
- if (sym && strong_try_module_get(owner))
+ sym = find_symbol(symbol, &owner, NULL, &license, true, true);
+ if (!sym)
+ goto fail;
+ if (license != GPL_ONLY) {
+ pr_warn("failing symbol_get of non-GPLONLY symbol %s.\n",
+ symbol);
+ goto fail;
+ }
+ if (strong_try_module_get(owner))
sym = NULL;
preempt_enable();
return sym ? (void *)kernel_symbol_value(sym) : NULL;
+fail:
+ preempt_enable();
+ return NULL;
}
EXPORT_SYMBOL_GPL(__symbol_get);
diff --git a/kernel/padata.c b/kernel/padata.c
index 7f2b6d369..a9e14183e 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -121,7 +121,7 @@ int padata_do_parallel(struct padata_instance *pinst,
if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
goto out;
- err = -EBUSY;
+ err = -EBUSY;
if ((pinst->flags & PADATA_RESET))
goto out;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index f2635fc75..5abe4582d 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -2376,8 +2376,9 @@ static void *get_highmem_page_buffer(struct page *page,
pbe->copy_page = tmp;
} else {
/* Copy of the page will be stored in normal memory */
- kaddr = safe_pages_list;
- safe_pages_list = safe_pages_list->next;
+ kaddr = __get_safe_page(ca->gfp_mask);
+ if (!kaddr)
+ return ERR_PTR(-ENOMEM);
pbe->copy_page = virt_to_page(kaddr);
}
pbe->next = highmem_pblist;
@@ -2557,8 +2558,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
return ERR_PTR(-ENOMEM);
}
pbe->orig_address = page_address(page);
- pbe->address = safe_pages_list;
- safe_pages_list = safe_pages_list->next;
+ pbe->address = __get_safe_page(ca->gfp_mask);
+ if (!pbe->address)
+ return ERR_PTR(-ENOMEM);
pbe->next = restore_pblist;
restore_pblist = pbe;
return pbe->address;
@@ -2589,8 +2591,6 @@ int snapshot_write_next(struct snapshot_handle *handle)
if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
return 0;
- handle->sync_read = 1;
-
if (!handle->cur) {
if (!buffer)
/* This makes the buffer be freed by swsusp_free() */
@@ -2631,7 +2631,6 @@ int snapshot_write_next(struct snapshot_handle *handle)
memory_bm_position_reset(&orig_bm);
restore_pblist = NULL;
handle->buffer = get_buffer(&orig_bm, &ca);
- handle->sync_read = 0;
if (IS_ERR(handle->buffer))
return PTR_ERR(handle->buffer);
}
@@ -2643,9 +2642,8 @@ int snapshot_write_next(struct snapshot_handle *handle)
handle->buffer = get_buffer(&orig_bm, &ca);
if (IS_ERR(handle->buffer))
return PTR_ERR(handle->buffer);
- if (handle->buffer != buffer)
- handle->sync_read = 0;
}
+ handle->sync_read = (handle->buffer == buffer);
handle->cur++;
return PAGE_SIZE;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index eb67f42fb..09f82c844 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8721,7 +8721,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
.sd = sd,
.dst_cpu = this_cpu,
.dst_rq = this_rq,
- .dst_grpmask = sched_group_span(sd->groups),
+ .dst_grpmask = group_balance_mask(sd->groups),
.idle = idle,
.loop_break = sched_nr_migrate_break,
.cpus = cpus,
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 44a17366c..4e3d149d6 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -53,17 +53,18 @@ __setup("hlt", cpu_idle_nopoll_setup);
static noinline int __cpuidle cpu_idle_poll(void)
{
+ trace_cpu_idle(0, smp_processor_id());
+ stop_critical_timings();
rcu_idle_enter();
- trace_cpu_idle_rcuidle(0, smp_processor_id());
local_irq_enable();
- stop_critical_timings();
while (!tif_need_resched() &&
- (cpu_idle_force_poll || tick_check_broadcast_expired()))
+ (cpu_idle_force_poll || tick_check_broadcast_expired()))
cpu_relax();
- start_critical_timings();
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
+
rcu_idle_exit();
+ start_critical_timings();
+ trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
return 1;
}
@@ -90,7 +91,9 @@ void __cpuidle default_idle_call(void)
local_irq_enable();
} else {
stop_critical_timings();
+ rcu_idle_enter();
arch_cpu_idle();
+ rcu_idle_exit();
start_critical_timings();
}
}
@@ -148,7 +151,6 @@ static void cpuidle_idle_call(void)
if (cpuidle_not_available(drv, dev)) {
tick_nohz_idle_stop_tick();
- rcu_idle_enter();
default_idle_call();
goto exit_idle;
@@ -166,19 +168,15 @@ static void cpuidle_idle_call(void)
if (idle_should_enter_s2idle() || dev->use_deepest_state) {
if (idle_should_enter_s2idle()) {
- rcu_idle_enter();
entered_state = cpuidle_enter_s2idle(drv, dev);
if (entered_state > 0) {
local_irq_enable();
goto exit_idle;
}
-
- rcu_idle_exit();
}
tick_nohz_idle_stop_tick();
- rcu_idle_enter();
next_state = cpuidle_find_deepest_state(drv, dev);
call_cpuidle(drv, dev, next_state);
@@ -195,8 +193,6 @@ static void cpuidle_idle_call(void)
else
tick_nohz_idle_retain_tick();
- rcu_idle_enter();
-
entered_state = call_cpuidle(drv, dev, next_state);
/*
* Give the governor an opportunity to reflect on the outcome
@@ -212,8 +208,6 @@ exit_idle:
*/
if (WARN_ON_ONCE(irqs_disabled()))
local_irq_enable();
-
- rcu_idle_exit();
}
/*
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 9c6c3572b..394c66442 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1522,6 +1522,8 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
BUG_ON(idx >= MAX_RT_PRIO);
queue = array->queue + idx;
+ if (SCHED_WARN_ON(list_empty(queue)))
+ return NULL;
next = list_entry(queue->next, struct sched_rt_entity, run_list);
return next;
@@ -1535,7 +1537,8 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
do {
rt_se = pick_next_rt_entity(rq, rt_rq);
- BUG_ON(!rt_se);
+ if (unlikely(!rt_se))
+ return NULL;
rt_rq = group_rt_rq(rt_se);
} while (rt_rq);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 8512f06f0..bf74f43e4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1922,29 +1922,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
}
}
-int hrtimers_dead_cpu(unsigned int scpu)
+int hrtimers_cpu_dying(unsigned int dying_cpu)
{
struct hrtimer_cpu_base *old_base, *new_base;
- int i;
+ int i, ncpu = cpumask_first(cpu_active_mask);
- BUG_ON(cpu_online(scpu));
- tick_cancel_sched_timer(scpu);
+ tick_cancel_sched_timer(dying_cpu);
+
+ old_base = this_cpu_ptr(&hrtimer_bases);
+ new_base = &per_cpu(hrtimer_bases, ncpu);
- /*
- * this BH disable ensures that raise_softirq_irqoff() does
- * not wakeup ksoftirqd (and acquire the pi-lock) while
- * holding the cpu_base lock
- */
- local_bh_disable();
- local_irq_disable();
- old_base = &per_cpu(hrtimer_bases, scpu);
- new_base = this_cpu_ptr(&hrtimer_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
*/
- raw_spin_lock(&new_base->lock);
- raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock(&old_base->lock);
+ raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
migrate_hrtimer_list(&old_base->clock_base[i],
@@ -1955,15 +1948,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
* The migration might have changed the first expiring softirq
* timer on this CPU. Update it.
*/
- hrtimer_update_softirq_timer(new_base, false);
+ __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
+ /* Tell the other CPU to retrigger the next event */
+ smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
- raw_spin_unlock(&old_base->lock);
raw_spin_unlock(&new_base->lock);
+ raw_spin_unlock(&old_base->lock);
- /* Check, if we got expired work to do */
- __hrtimer_peek_ahead_timers();
- local_irq_enable();
- local_bh_enable();
return 0;
}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 1234868b3..8768ce2c4 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -159,25 +159,30 @@ static struct k_itimer *posix_timer_by_id(timer_t id)
static int posix_timer_add(struct k_itimer *timer)
{
struct signal_struct *sig = current->signal;
- int first_free_id = sig->posix_timer_id;
struct hlist_head *head;
- int ret = -ENOENT;
+ unsigned int cnt, id;
- do {
+ /*
+ * FIXME: Replace this by a per signal struct xarray once there is
+ * a plan to handle the resulting CRIU regression gracefully.
+ */
+ for (cnt = 0; cnt <= INT_MAX; cnt++) {
spin_lock(&hash_lock);
- head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
- if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
+ id = sig->next_posix_timer_id;
+
+ /* Write the next ID back. Clamp it to the positive space */
+ sig->next_posix_timer_id = (id + 1) & INT_MAX;
+
+ head = &posix_timers_hashtable[hash(sig, id)];
+ if (!__posix_timers_find(head, sig, id)) {
hlist_add_head_rcu(&timer->t_hash, head);
- ret = sig->posix_timer_id;
+ spin_unlock(&hash_lock);
+ return id;
}
- if (++sig->posix_timer_id < 0)
- sig->posix_timer_id = 0;
- if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
- /* Loop over all possible ids completed */
- ret = -EAGAIN;
spin_unlock(&hash_lock);
- } while (ret == -ENOENT);
- return ret;
+ }
+ /* POSIX return code when no timer ID could be allocated */
+ return -EAGAIN;
}
static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 1cb13d636..b794470bb 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1274,7 +1274,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
#ifdef CONFIG_UPROBE_EVENTS
if (flags & TRACE_EVENT_FL_UPROBE)
err = bpf_get_uprobe_info(event, fd_type, buf,
- probe_offset,
+ probe_offset, probe_addr,
event->attr.type == PERF_TYPE_TRACEPOINT);
#endif
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5c0463dbe..81f5c9c85 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1124,7 +1124,7 @@ struct ftrace_page {
struct ftrace_page *next;
struct dyn_ftrace *records;
int index;
- int size;
+ int order;
};
#define ENTRY_SIZE sizeof(struct dyn_ftrace)
@@ -2915,6 +2915,8 @@ static void ftrace_shutdown_sysctl(void)
static u64 ftrace_update_time;
unsigned long ftrace_update_tot_cnt;
+unsigned long ftrace_number_of_pages;
+unsigned long ftrace_number_of_groups;
static inline int ops_traces_mod(struct ftrace_ops *ops)
{
@@ -3039,8 +3041,11 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count)
goto again;
}
+ ftrace_number_of_pages += 1 << order;
+ ftrace_number_of_groups++;
+
cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
- pg->size = cnt;
+ pg->order = order;
if (cnt > count)
cnt = count;
@@ -3048,12 +3053,27 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count)
return cnt;
}
+static void ftrace_free_pages(struct ftrace_page *pages)
+{
+ struct ftrace_page *pg = pages;
+
+ while (pg) {
+ if (pg->records) {
+ free_pages((unsigned long)pg->records, pg->order);
+ ftrace_number_of_pages -= 1 << pg->order;
+ }
+ pages = pg->next;
+ kfree(pg);
+ pg = pages;
+ ftrace_number_of_groups--;
+ }
+}
+
static struct ftrace_page *
ftrace_allocate_pages(unsigned long num_to_init)
{
struct ftrace_page *start_pg;
struct ftrace_page *pg;
- int order;
int cnt;
if (!num_to_init)
@@ -3087,14 +3107,7 @@ ftrace_allocate_pages(unsigned long num_to_init)
return start_pg;
free_pages:
- pg = start_pg;
- while (pg) {
- order = get_count_order(pg->size / ENTRIES_PER_PAGE);
- free_pages((unsigned long)pg->records, order);
- start_pg = pg->next;
- kfree(pg);
- pg = start_pg;
- }
+ ftrace_free_pages(start_pg);
pr_info("ftrace: FAILED to allocate memory for functions\n");
return NULL;
}
@@ -5586,9 +5599,11 @@ static int ftrace_process_locs(struct module *mod,
unsigned long *start,
unsigned long *end)
{
+ struct ftrace_page *pg_unuse = NULL;
struct ftrace_page *start_pg;
struct ftrace_page *pg;
struct dyn_ftrace *rec;
+ unsigned long skipped = 0;
unsigned long count;
unsigned long *p;
unsigned long addr;
@@ -5634,6 +5649,7 @@ static int ftrace_process_locs(struct module *mod,
p = start;
pg = start_pg;
while (p < end) {
+ unsigned long end_offset;
addr = ftrace_call_adjust(*p++);
/*
* Some architecture linkers will pad between
@@ -5641,10 +5657,13 @@ static int ftrace_process_locs(struct module *mod,
* object files to satisfy alignments.
* Skip any NULL pointers.
*/
- if (!addr)
+ if (!addr) {
+ skipped++;
continue;
+ }
- if (pg->index == pg->size) {
+ end_offset = (pg->index+1) * sizeof(pg->records[0]);
+ if (end_offset > PAGE_SIZE << pg->order) {
/* We should have allocated enough */
if (WARN_ON(!pg->next))
break;
@@ -5655,8 +5674,10 @@ static int ftrace_process_locs(struct module *mod,
rec->ip = addr;
}
- /* We should have used all pages */
- WARN_ON(pg->next);
+ if (pg->next) {
+ pg_unuse = pg->next;
+ pg->next = NULL;
+ }
/* Assign the last page to ftrace_pages */
ftrace_pages = pg;
@@ -5678,6 +5699,11 @@ static int ftrace_process_locs(struct module *mod,
out:
mutex_unlock(&ftrace_lock);
+ /* We should have used all pages unless we skipped some */
+ if (pg_unuse) {
+ WARN_ON(!skipped);
+ ftrace_free_pages(pg_unuse);
+ }
return ret;
}
@@ -5784,7 +5810,6 @@ void ftrace_release_mod(struct module *mod)
struct ftrace_page **last_pg;
struct ftrace_page *tmp_page = NULL;
struct ftrace_page *pg;
- int order;
mutex_lock(&ftrace_lock);
@@ -5835,10 +5860,13 @@ void ftrace_release_mod(struct module *mod)
/* Needs to be called outside of ftrace_lock */
clear_mod_from_hashes(pg);
- order = get_count_order(pg->size / ENTRIES_PER_PAGE);
- free_pages((unsigned long)pg->records, order);
+ if (pg->records) {
+ free_pages((unsigned long)pg->records, pg->order);
+ ftrace_number_of_pages -= 1 << pg->order;
+ }
tmp_page = pg->next;
kfree(pg);
+ ftrace_number_of_groups--;
}
}
@@ -6144,7 +6172,6 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr)
struct ftrace_mod_map *mod_map = NULL;
struct ftrace_init_func *func, *func_next;
struct list_head clear_hash;
- int order;
INIT_LIST_HEAD(&clear_hash);
@@ -6182,8 +6209,11 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr)
ftrace_update_tot_cnt--;
if (!pg->index) {
*last_pg = pg->next;
- order = get_count_order(pg->size / ENTRIES_PER_PAGE);
- free_pages((unsigned long)pg->records, order);
+ if (pg->records) {
+ free_pages((unsigned long)pg->records, pg->order);
+ ftrace_number_of_pages -= 1 << pg->order;
+ }
+ ftrace_number_of_groups--;
kfree(pg);
pg = container_of(last_pg, struct ftrace_page, next);
if (!(*last_pg))
@@ -6239,6 +6269,9 @@ void __init ftrace_init(void)
__start_mcount_loc,
__stop_mcount_loc);
+ pr_info("ftrace: allocated %ld pages with %ld groups\n",
+ ftrace_number_of_pages, ftrace_number_of_groups);
+
set_ftrace_early_filters();
return;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ba8b72f9c..b627bc820 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -493,6 +493,8 @@ struct ring_buffer_per_cpu {
unsigned long read_bytes;
u64 write_stamp;
u64 read_stamp;
+ /* pages removed since last reset */
+ unsigned long pages_removed;
/* ring buffer pages to update, > 0 to add, < 0 to remove */
long nr_pages_to_update;
struct list_head new_pages; /* new pages to add */
@@ -528,6 +530,7 @@ struct ring_buffer_iter {
struct buffer_page *head_page;
struct buffer_page *cache_reader_page;
unsigned long cache_read;
+ unsigned long cache_pages_removed;
u64 read_stamp;
};
@@ -561,7 +564,7 @@ static void rb_wake_up_waiters(struct irq_work *work)
*/
int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
{
- struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
+ struct ring_buffer_per_cpu *cpu_buffer;
DEFINE_WAIT(wait);
struct rb_irq_work *work;
int ret = 0;
@@ -1341,6 +1344,8 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
free_buffer_page(bpage);
}
+ free_page((unsigned long)cpu_buffer->free_page);
+
kfree(cpu_buffer);
}
@@ -1514,6 +1519,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
to_remove = rb_list_head(to_remove)->next;
head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
}
+ /* Read iterators need to reset themselves when some pages removed */
+ cpu_buffer->pages_removed += nr_removed;
next_page = rb_list_head(to_remove)->next;
@@ -1535,12 +1542,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
cpu_buffer->head_page = list_entry(next_page,
struct buffer_page, list);
- /*
- * change read pointer to make sure any read iterators reset
- * themselves
- */
- cpu_buffer->read = 0;
-
/* pages are removed, resume tracing and then free the pages */
atomic_dec(&cpu_buffer->record_disabled);
raw_spin_unlock_irq(&cpu_buffer->reader_lock);
@@ -1754,6 +1755,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
err = -ENOMEM;
goto out_err;
}
+
+ cond_resched();
}
get_online_cpus();
@@ -3582,6 +3585,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
iter->cache_reader_page = iter->head_page;
iter->cache_read = cpu_buffer->read;
+ iter->cache_pages_removed = cpu_buffer->pages_removed;
if (iter->head)
iter->read_stamp = cpu_buffer->read_stamp;
@@ -4022,12 +4026,13 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
buffer = cpu_buffer->buffer;
/*
- * Check if someone performed a consuming read to
- * the buffer. A consuming read invalidates the iterator
- * and we need to reset the iterator in this case.
+ * Check if someone performed a consuming read to the buffer
+ * or removed some pages from the buffer. In these cases,
+ * iterator was invalidated and we need to reset it.
*/
if (unlikely(iter->cache_read != cpu_buffer->read ||
- iter->cache_reader_page != cpu_buffer->reader_page))
+ iter->cache_reader_page != cpu_buffer->reader_page ||
+ iter->cache_pages_removed != cpu_buffer->pages_removed))
rb_iter_reset(iter);
again:
@@ -4408,28 +4413,34 @@ unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
}
EXPORT_SYMBOL_GPL(ring_buffer_size);
+static void rb_clear_buffer_page(struct buffer_page *page)
+{
+ local_set(&page->write, 0);
+ local_set(&page->entries, 0);
+ rb_init_page(page->page);
+ page->read = 0;
+}
+
static void
rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
{
+ struct buffer_page *page;
+
rb_head_page_deactivate(cpu_buffer);
cpu_buffer->head_page
= list_entry(cpu_buffer->pages, struct buffer_page, list);
- local_set(&cpu_buffer->head_page->write, 0);
- local_set(&cpu_buffer->head_page->entries, 0);
- local_set(&cpu_buffer->head_page->page->commit, 0);
-
- cpu_buffer->head_page->read = 0;
+ rb_clear_buffer_page(cpu_buffer->head_page);
+ list_for_each_entry(page, cpu_buffer->pages, list) {
+ rb_clear_buffer_page(page);
+ }
cpu_buffer->tail_page = cpu_buffer->head_page;
cpu_buffer->commit_page = cpu_buffer->head_page;
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
INIT_LIST_HEAD(&cpu_buffer->new_pages);
- local_set(&cpu_buffer->reader_page->write, 0);
- local_set(&cpu_buffer->reader_page->entries, 0);
- local_set(&cpu_buffer->reader_page->page->commit, 0);
- cpu_buffer->reader_page->read = 0;
+ rb_clear_buffer_page(cpu_buffer->reader_page);
local_set(&cpu_buffer->entries_bytes, 0);
local_set(&cpu_buffer->overrun, 0);
@@ -4448,6 +4459,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->last_overrun = 0;
rb_head_page_activate(cpu_buffer);
+ cpu_buffer->pages_removed = 0;
}
/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 98abff046..b43d681b0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2209,8 +2209,11 @@ void trace_buffered_event_enable(void)
for_each_tracing_cpu(cpu) {
page = alloc_pages_node(cpu_to_node(cpu),
GFP_KERNEL | __GFP_NORETRY, 0);
- if (!page)
- goto failed;
+ /* This is just an optimization and can handle failures */
+ if (!page) {
+ pr_err("Failed to allocate event buffer\n");
+ break;
+ }
event = page_address(page);
memset(event, 0, sizeof(*event));
@@ -2224,10 +2227,6 @@ void trace_buffered_event_enable(void)
WARN_ON_ONCE(1);
preempt_enable();
}
-
- return;
- failed:
- trace_buffered_event_disable();
}
static void enable_trace_buffered_event(void *data)
@@ -2262,11 +2261,9 @@ void trace_buffered_event_disable(void)
if (--trace_buffered_event_ref)
return;
- preempt_disable();
/* For each CPU, set the buffer as used. */
- smp_call_function_many(tracing_buffer_mask,
- disable_trace_buffered_event, NULL, 1);
- preempt_enable();
+ on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
+ NULL, true);
/* Wait for all current users to finish */
synchronize_sched();
@@ -2275,17 +2272,19 @@ void trace_buffered_event_disable(void)
free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
per_cpu(trace_buffered_event, cpu) = NULL;
}
+
/*
- * Make sure trace_buffered_event is NULL before clearing
- * trace_buffered_event_cnt.
+ * Wait for all CPUs that potentially started checking if they can use
+ * their event buffer only after the previous synchronize_rcu() call and
+ * they still read a valid pointer from trace_buffered_event. It must be
+ * ensured they don't see cleared trace_buffered_event_cnt else they
+ * could wrongly decide to use the pointed-to buffer which is now freed.
*/
- smp_wmb();
+ synchronize_rcu();
- preempt_disable();
- /* Do the work on each cpu */
- smp_call_function_many(tracing_buffer_mask,
- enable_trace_buffered_event, NULL, 1);
- preempt_enable();
+ /* For each CPU, relinquish the buffer */
+ on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
+ true);
}
static struct ring_buffer *temp_buffer;
@@ -3277,8 +3276,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
* will point to the same string as current_trace->name.
*/
mutex_lock(&trace_types_lock);
- if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
+ if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
+ /* Close iter->trace before switching to the new current tracer */
+ if (iter->trace->close)
+ iter->trace->close(iter);
*iter->trace = *tr->current_trace;
+ /* Reopen the new current tracer */
+ if (iter->trace->open)
+ iter->trace->open(iter);
+ }
mutex_unlock(&trace_types_lock);
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -5252,8 +5258,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
return ret;
#ifdef CONFIG_TRACER_MAX_TRACE
- if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
- !tr->current_trace->use_max_tr)
+ if (!tr->current_trace->use_max_tr)
goto out;
ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
@@ -7107,14 +7112,23 @@ static ssize_t
tracing_read_dyn_info(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- unsigned long *p = filp->private_data;
- char buf[64]; /* Not too big for a shallow stack */
+ ssize_t ret;
+ char *buf;
int r;
- r = scnprintf(buf, 63, "%ld", *p);
- buf[r++] = '\n';
+ /* 256 should be plenty to hold the amount needed */
+ buf = kmalloc(256, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
+ ftrace_update_tot_cnt,
+ ftrace_number_of_pages,
+ ftrace_number_of_groups);
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ kfree(buf);
+ return ret;
}
static const struct file_operations tracing_dyn_info_fops = {
@@ -8246,7 +8260,7 @@ static __init int tracer_init_tracefs(void)
#ifdef CONFIG_DYNAMIC_FTRACE
trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
- &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
+ NULL, &tracing_dyn_info_fops);
#endif
create_trace_instances(d_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 0923d1b18..f4d83b552 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -748,6 +748,8 @@ extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
#ifdef CONFIG_DYNAMIC_FTRACE
extern unsigned long ftrace_update_tot_cnt;
+extern unsigned long ftrace_number_of_pages;
+extern unsigned long ftrace_number_of_groups;
void ftrace_init_trace_array(struct trace_array *tr);
#else
static inline void ftrace_init_trace_array(struct trace_array *tr) { }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d2f9146d1..ed39d3ec2 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -372,7 +372,6 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
{
struct trace_event_call *call = file->event_call;
struct trace_array *tr = file->tr;
- unsigned long file_flags = file->flags;
int ret = 0;
int disable;
@@ -396,6 +395,8 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
break;
disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
+ /* Disable use of trace_buffered_event */
+ trace_buffered_event_disable();
} else
disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
@@ -434,6 +435,8 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
if (atomic_inc_return(&file->sm_ref) > 1)
break;
set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
+ /* Enable use of trace_buffered_event */
+ trace_buffered_event_enable();
}
if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
@@ -473,15 +476,6 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
break;
}
- /* Enable or disable use of trace_buffered_event */
- if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) !=
- (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) {
- if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
- trace_buffered_event_enable();
- else
- trace_buffered_event_disable();
- }
-
return ret;
}
@@ -2248,6 +2242,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
update_event_printk(call, map[i]);
}
}
+ cond_resched();
}
up_write(&trace_event_sem);
}
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 455cf41ae..e004daf8c 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -5787,13 +5787,16 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
if (get_named_trigger_data(trigger_data))
goto enable;
- if (has_hist_vars(hist_data))
- save_hist_vars(hist_data);
-
ret = create_actions(hist_data, file);
if (ret)
goto out_unreg;
+ if (has_hist_vars(hist_data) || hist_data->n_var_refs) {
+ ret = save_hist_vars(hist_data);
+ if (ret)
+ goto out_unreg;
+ }
+
ret = tracing_map_init(hist_data->map);
if (ret)
goto out_unreg;
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 98ea6d28d..0f36bb599 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -222,7 +222,8 @@ static void irqsoff_trace_open(struct trace_iterator *iter)
{
if (is_graph(iter->tr))
graph_trace_open(iter);
-
+ else
+ iter->private = NULL;
}
static void irqsoff_trace_close(struct trace_iterator *iter)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 11f4dbd95..8041bd5e4 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -287,6 +287,8 @@ static void wakeup_trace_open(struct trace_iterator *iter)
{
if (is_graph(iter->tr))
graph_trace_open(iter);
+ else
+ iter->private = NULL;
}
static void wakeup_trace_close(struct trace_iterator *iter)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 0da379b90..0e3bdd69f 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1159,7 +1159,7 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
const char **filename, u64 *probe_offset,
- bool perf_type_tracepoint)
+ u64 *probe_addr, bool perf_type_tracepoint)
{
const char *pevent = trace_event_name(event->tp_event);
const char *group = event->tp_event->class->system;
@@ -1176,6 +1176,7 @@ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
: BPF_FD_TYPE_UPROBE;
*filename = tu->filename;
*probe_offset = tu->offset;
+ *probe_addr = 0;
return 0;
}
#endif /* CONFIG_PERF_EVENTS */
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 71381168d..f8e460b4a 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -114,14 +114,14 @@ static void watchdog_overflow_callback(struct perf_event *event,
/* Ensure the watchdog never gets throttled */
event->hw.interrupts = 0;
+ if (!watchdog_check_timestamp())
+ return;
+
if (__this_cpu_read(watchdog_nmi_touch) == true) {
__this_cpu_write(watchdog_nmi_touch, false);
return;
}
- if (!watchdog_check_timestamp())
- return;
-
/* check for a hardlockup
* This is done by making sure our timer interrupt
* is incrementing. The timer interrupt should have
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4ea2f7fd2..017939097 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -680,12 +680,17 @@ static void clear_work_data(struct work_struct *work)
set_work_data(work, WORK_STRUCT_NO_POOL, 0);
}
+static inline struct pool_workqueue *work_struct_pwq(unsigned long data)
+{
+ return (struct pool_workqueue *)(data & WORK_STRUCT_WQ_DATA_MASK);
+}
+
static struct pool_workqueue *get_work_pwq(struct work_struct *work)
{
unsigned long data = atomic_long_read(&work->data);
if (data & WORK_STRUCT_PWQ)
- return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
+ return work_struct_pwq(data);
else
return NULL;
}
@@ -713,8 +718,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
assert_rcu_or_pool_mutex();
if (data & WORK_STRUCT_PWQ)
- return ((struct pool_workqueue *)
- (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
+ return work_struct_pwq(data)->pool;
pool_id = data >> WORK_OFFQ_POOL_SHIFT;
if (pool_id == WORK_OFFQ_POOL_NONE)
@@ -735,8 +739,7 @@ static int get_work_pool_id(struct work_struct *work)
unsigned long data = atomic_long_read(&work->data);
if (data & WORK_STRUCT_PWQ)
- return ((struct pool_workqueue *)
- (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
+ return work_struct_pwq(data)->pool->id;
return data >> WORK_OFFQ_POOL_SHIFT;
}
@@ -5084,9 +5087,13 @@ static int workqueue_apply_unbound_cpumask(void)
list_for_each_entry(wq, &workqueues, list) {
if (!(wq->flags & WQ_UNBOUND))
continue;
+
/* creating multiple pwqs breaks ordering guarantee */
- if (wq->flags & __WQ_ORDERED)
- continue;
+ if (!list_empty(&wq->pwqs)) {
+ if (wq->flags & __WQ_ORDERED_EXPLICIT)
+ continue;
+ wq->flags &= ~__WQ_ORDERED;
+ }
ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
if (!ctx) {