deps/jemalloc/include/jemalloc/internal/pa.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243

#ifndef JEMALLOC_INTERNAL_PA_H
#define JEMALLOC_INTERNAL_PA_H

#include "jemalloc/internal/base.h"
#include "jemalloc/internal/decay.h"
#include "jemalloc/internal/ecache.h"
#include "jemalloc/internal/edata_cache.h"
#include "jemalloc/internal/emap.h"
#include "jemalloc/internal/hpa.h"
#include "jemalloc/internal/lockedint.h"
#include "jemalloc/internal/pac.h"
#include "jemalloc/internal/pai.h"
#include "jemalloc/internal/sec.h"

/*
 * The page allocator; responsible for acquiring pages of memory for
 * allocations.  It picks the implementation of the page allocator interface
 * (i.e. a pai_t) to handle a given page-level allocation request.  For now, the
 * only such implementation is the PAC code ("page allocator classic"), but
 * others will be coming soon.
 */

typedef struct pa_central_s pa_central_t;
struct pa_central_s {
	hpa_central_t hpa;
};

/*
 * The stats for a particular pa_shard.  Because of the way the ctl module
 * handles stats epoch data collection (it has its own arena_stats, and merges
 * the stats from each arena into it), this needs to live in the arena_stats_t;
 * hence we define it here and let the pa_shard have a pointer (rather than the
 * more natural approach of just embedding it in the pa_shard itself).
 *
 * We follow the arena_stats_t approach of marking the derived fields.  These
 * are the ones that are not maintained on their own; instead, their values are
 * derived during those stats merges.
 */
typedef struct pa_shard_stats_s pa_shard_stats_t;
struct pa_shard_stats_s {
	/* Number of edata_t structs allocated by base, but not being used. */
	size_t edata_avail; /* Derived. */
	/*
	 * Stats specific to the PAC.  For now, these are the only stats that
	 * exist, but there will eventually be other page allocators.  Things
	 * like edata_avail make sense in a cross-PA sense, but things like
	 * npurges don't.
	 */
	pac_stats_t pac_stats;
};

/*
 * The local allocator handle.  Keeps the state necessary to satisfy page-sized
 * allocations.
 *
 * The contents are mostly internal to the PA module.  The key exception is that
 * arena decay code is allowed to grab pointers to the dirty and muzzy ecaches
 * decay_ts, for a couple of queries, passing them back to a PA function, or
 * acquiring decay.mtx and looking at decay.purging.  The reasoning is that,
 * while PA decides what and how to purge, the arena code decides when and where
 * (e.g. on what thread).  It's allowed to use the presence of another purger to
 * decide.
 * (The background thread code also touches some other decay internals, but
 * that's not fundamental; its' just an artifact of a partial refactoring, and
 * its accesses could be straightforwardly moved inside the decay module).
 */
typedef struct pa_shard_s pa_shard_t;
struct pa_shard_s {
	/* The central PA this shard is associated with. */
	pa_central_t *central;

	/*
	 * Number of pages in active extents.
	 *
	 * Synchronization: atomic.
	 */
	atomic_zu_t nactive;

	/*
	 * Whether or not we should prefer the hugepage allocator.  Atomic since
	 * it may be concurrently modified by a thread setting extent hooks.
	 * Note that we still may do HPA operations in this arena; if use_hpa is
	 * changed from true to false, we'll free back to the hugepage allocator
	 * for those allocations.
	 */
	atomic_b_t use_hpa;

	/*
	 * If we never used the HPA to begin with, it wasn't initialized, and so
	 * we shouldn't try to e.g. acquire its mutexes during fork.  This
	 * tracks that knowledge.
	 */
	bool ever_used_hpa;

	/* Allocates from a PAC. */
	pac_t pac;

	/*
	 * We place a small extent cache in front of the HPA, since we intend
	 * these configurations to use many fewer arenas, and therefore have a
	 * higher risk of hot locks.
	 */
	sec_t hpa_sec;
	hpa_shard_t hpa_shard;

	/* The source of edata_t objects. */
	edata_cache_t edata_cache;

	unsigned ind;

	malloc_mutex_t *stats_mtx;
	pa_shard_stats_t *stats;

	/* The emap this shard is tied to. */
	emap_t *emap;

	/* The base from which we get the ehooks and allocate metadat. */
	base_t *base;
};

static inline bool
pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 &&
	    pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0;
}

static inline ehooks_t *
pa_shard_ehooks_get(pa_shard_t *shard) {
	return base_ehooks_get(shard->base);
}

/* Returns true on error. */
bool pa_central_init(pa_central_t *central, base_t *base, bool hpa,
    hpa_hooks_t *hpa_hooks);

/* Returns true on error. */
bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
    emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats,
    malloc_mutex_t *stats_mtx, nstime_t *cur_time, size_t oversize_threshold,
    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);

/*
 * This isn't exposed to users; we allow late enablement of the HPA shard so
 * that we can boot without worrying about the HPA, then turn it on in a0.
 */
bool pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts);

/*
 * We stop using the HPA when custom extent hooks are installed, but still
 * redirect deallocations to it.
 */
void pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard);

/*
 * This does the PA-specific parts of arena reset (i.e. freeing all active
 * allocations).
 */
void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard);

/*
 * Destroy all the remaining retained extents.  Should only be called after
 * decaying all active, dirty, and muzzy extents to the retained state, as the
 * last step in destroying the shard.
 */
void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);

/* Gets an edata for the given allocation. */
edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
    size_t alignment, bool slab, szind_t szind, bool zero, bool guarded,
    bool *deferred_work_generated);
/* Returns true on error, in which case nothing changed. */
bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
    size_t new_size, szind_t szind, bool zero, bool *deferred_work_generated);
/*
 * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
 * false otherwise.
 */
bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
    size_t new_size, szind_t szind, bool *deferred_work_generated);
/*
 * Frees the given edata back to the pa.  Sets *generated_dirty if we produced
 * new dirty pages (well, we always set it for now; but this need not be the
 * case).
 * (We could make generated_dirty the return value of course, but this is more
 * consistent with the shrink pathway and our error codes here).
 */
void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
    bool *deferred_work_generated);
bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
    ssize_t decay_ms, pac_purge_eagerness_t eagerness);
ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);

/*
 * Do deferred work on this PA shard.
 *
 * Morally, this should do both PAC decay and the HPA deferred work.  For now,
 * though, the arena, background thread, and PAC modules are tightly interwoven
 * in a way that's tricky to extricate, so we only do the HPA-specific parts.
 */
void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
    bool deferral_allowed);
void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
void pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
uint64_t pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);

/******************************************************************************/
/*
 * Various bits of "boring" functionality that are still part of this module,
 * but that we relegate to pa_extra.c, to keep the core logic in pa.c as
 * readable as possible.
 */

/*
 * These fork phases are synchronized with the arena fork phase numbering to
 * make it easy to keep straight. That's why there's no prefork1.
 */
void pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard);
void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard);
void pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard);
void pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard);
void pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard);
void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);

void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
    size_t *ndirty, size_t *nmuzzy);

void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
    pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
    hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
    size_t *resident);

/*
 * Reads the PA-owned mutex stats into the output stats array, at the
 * appropriate positions.  Morally, these stats should really live in
 * pa_shard_stats_t, but the indices are sort of baked into the various mutex
 * prof macros.  This would be a good thing to do at some point.
 */
void pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
    mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]);

#endif /* JEMALLOC_INTERNAL_PA_H */