1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
// SPDX-License-Identifier: GPL-2.0+
/*
* Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
*
* Copyright IBM Corporation, 2008
*
* Author: Paul E. McKenney <paulmck@linux.ibm.com>
*
* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU
*/
#include <linux/completion.h>
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/rcupdate_wait.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/time.h>
#include <linux/cpu.h>
#include <linux/prefetch.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include "rcu.h"
/* Global control variables for rcupdate callback mechanism. */
struct rcu_ctrlblk {
struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
struct rcu_head **curtail; /* ->next pointer of last CB. */
unsigned long gp_seq; /* Grace-period counter. */
};
/* Definition for rcupdate control block. */
static struct rcu_ctrlblk rcu_ctrlblk = {
.donetail = &rcu_ctrlblk.rcucblist,
.curtail = &rcu_ctrlblk.rcucblist,
.gp_seq = 0 - 300UL,
};
void rcu_barrier(void)
{
wait_rcu_gp(call_rcu_hurry);
}
EXPORT_SYMBOL(rcu_barrier);
/* Record an rcu quiescent state. */
void rcu_qs(void)
{
unsigned long flags;
local_irq_save(flags);
if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
rcu_ctrlblk.donetail = rcu_ctrlblk.curtail;
raise_softirq_irqoff(RCU_SOFTIRQ);
}
WRITE_ONCE(rcu_ctrlblk.gp_seq, rcu_ctrlblk.gp_seq + 2);
local_irq_restore(flags);
}
/*
* Check to see if the scheduling-clock interrupt came from an extended
* quiescent state, and, if so, tell RCU about it. This function must
* be called from hardirq context. It is normally called from the
* scheduling-clock interrupt.
*/
void rcu_sched_clock_irq(int user)
{
if (user) {
rcu_qs();
} else if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
set_tsk_need_resched(current);
set_preempt_need_resched();
}
}
/*
* Reclaim the specified callback, either by invoking it for non-kfree cases or
* freeing it directly (for kfree). Return true if kfreeing, false otherwise.
*/
static inline bool rcu_reclaim_tiny(struct rcu_head *head)
{
rcu_callback_t f;
unsigned long offset = (unsigned long)head->func;
rcu_lock_acquire(&rcu_callback_map);
if (__is_kvfree_rcu_offset(offset)) {
trace_rcu_invoke_kvfree_callback("", head, offset);
kvfree((void *)head - offset);
rcu_lock_release(&rcu_callback_map);
return true;
}
trace_rcu_invoke_callback("", head);
f = head->func;
debug_rcu_head_callback(head);
WRITE_ONCE(head->func, (rcu_callback_t)0L);
f(head);
rcu_lock_release(&rcu_callback_map);
return false;
}
/* Invoke the RCU callbacks whose grace period has elapsed. */
static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
{
struct rcu_head *next, *list;
unsigned long flags;
/* Move the ready-to-invoke callbacks to a local list. */
local_irq_save(flags);
if (rcu_ctrlblk.donetail == &rcu_ctrlblk.rcucblist) {
/* No callbacks ready, so just leave. */
local_irq_restore(flags);
return;
}
list = rcu_ctrlblk.rcucblist;
rcu_ctrlblk.rcucblist = *rcu_ctrlblk.donetail;
*rcu_ctrlblk.donetail = NULL;
if (rcu_ctrlblk.curtail == rcu_ctrlblk.donetail)
rcu_ctrlblk.curtail = &rcu_ctrlblk.rcucblist;
rcu_ctrlblk.donetail = &rcu_ctrlblk.rcucblist;
local_irq_restore(flags);
/* Invoke the callbacks on the local list. */
while (list) {
next = list->next;
prefetch(next);
debug_rcu_head_unqueue(list);
local_bh_disable();
rcu_reclaim_tiny(list);
local_bh_enable();
list = next;
}
}
/*
* Wait for a grace period to elapse. But it is illegal to invoke
* synchronize_rcu() from within an RCU read-side critical section.
* Therefore, any legal call to synchronize_rcu() is a quiescent state,
* and so on a UP system, synchronize_rcu() need do nothing, other than
* let the polled APIs know that another grace period elapsed.
*
* (But Lai Jiangshan points out the benefits of doing might_sleep()
* to reduce latency.)
*
* Cool, huh? (Due to Josh Triplett.)
*/
void synchronize_rcu(void)
{
RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
lock_is_held(&rcu_lock_map) ||
lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_rcu() in RCU read-side critical section");
WRITE_ONCE(rcu_ctrlblk.gp_seq, rcu_ctrlblk.gp_seq + 2);
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
static void tiny_rcu_leak_callback(struct rcu_head *rhp)
{
}
/*
* Post an RCU callback to be invoked after the end of an RCU grace
* period. But since we have but one CPU, that would be after any
* quiescent state.
*/
void call_rcu(struct rcu_head *head, rcu_callback_t func)
{
static atomic_t doublefrees;
unsigned long flags;
if (debug_rcu_head_queue(head)) {
if (atomic_inc_return(&doublefrees) < 4) {
pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func);
mem_dump_obj(head);
}
if (!__is_kvfree_rcu_offset((unsigned long)head->func))
WRITE_ONCE(head->func, tiny_rcu_leak_callback);
return;
}
head->func = func;
head->next = NULL;
local_irq_save(flags);
*rcu_ctrlblk.curtail = head;
rcu_ctrlblk.curtail = &head->next;
local_irq_restore(flags);
if (unlikely(is_idle_task(current))) {
/* force scheduling for rcu_qs() */
resched_cpu(0);
}
}
EXPORT_SYMBOL_GPL(call_rcu);
/*
* Store a grace-period-counter "cookie". For more information,
* see the Tree RCU header comment.
*/
void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
{
rgosp->rgos_norm = RCU_GET_STATE_COMPLETED;
}
EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu_full);
/*
* Return a grace-period-counter "cookie". For more information,
* see the Tree RCU header comment.
*/
unsigned long get_state_synchronize_rcu(void)
{
return READ_ONCE(rcu_ctrlblk.gp_seq);
}
EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
/*
* Return a grace-period-counter "cookie" and ensure that a future grace
* period completes. For more information, see the Tree RCU header comment.
*/
unsigned long start_poll_synchronize_rcu(void)
{
unsigned long gp_seq = get_state_synchronize_rcu();
if (unlikely(is_idle_task(current))) {
/* force scheduling for rcu_qs() */
resched_cpu(0);
}
return gp_seq;
}
EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
/*
* Return true if the grace period corresponding to oldstate has completed
* and false otherwise. For more information, see the Tree RCU header
* comment.
*/
bool poll_state_synchronize_rcu(unsigned long oldstate)
{
return oldstate == RCU_GET_STATE_COMPLETED || READ_ONCE(rcu_ctrlblk.gp_seq) != oldstate;
}
EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
#ifdef CONFIG_KASAN_GENERIC
void kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
if (head)
kasan_record_aux_stack_noalloc(ptr);
__kvfree_call_rcu(head, ptr);
}
EXPORT_SYMBOL_GPL(kvfree_call_rcu);
#endif
void __init rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
rcu_early_boot_tests();
tasks_cblist_init_generic();
}
|