servers/lloadd/epoch.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339

/* epoch.c - epoch based memory reclamation */
/* $OpenLDAP$ */
/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
 *
 * Copyright 2018-2022 The OpenLDAP Foundation.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted only as authorized by the OpenLDAP
 * Public License.
 *
 * A copy of this license is available in the file LICENSE in the
 * top-level directory of the distribution or, alternatively, at
 * <http://www.OpenLDAP.org/license.html>.
 */

/** @file epoch.c
 *
 * Implementation of epoch based memory reclamation, in principle
 * similar to the algorithm presented in
 * https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf
 *
 * Not completely lock-free at the moment.
 *
 * Also the problems with epoch based memory reclamation are still
 * present - a thread actively observing an epoch getting stuck will
 * prevent managed objects (in our case connections and operations)
 * from being freed, potentially running out of memory.
 */

#include "portable.h"

#include "lload.h"
#include <epoch.h>

/* Has to be >= 3 */
#define EPOCH_MASK ( 1 << 2 )
#define EPOCH_PREV(epoch) ( ( (epoch) + EPOCH_MASK - 1 ) % EPOCH_MASK )
#define EPOCH_NEXT(epoch) ( ( (epoch) + 1 ) % EPOCH_MASK )

struct pending_ref {
    void *object;
    dispose_cb *dispose;
    struct pending_ref *next;
};

ldap_pvt_thread_rdwr_t epoch_mutex;

static epoch_t current_epoch;
static uintptr_t epoch_threads[EPOCH_MASK];
static struct pending_ref *references[EPOCH_MASK];

void
epoch_init( void )
{
    epoch_t epoch;

    current_epoch = 0;
    for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
        assert( !epoch_threads[epoch] );
        assert( !references[epoch] );
    }

    ldap_pvt_thread_rdwr_init( &epoch_mutex );
}

void
epoch_shutdown( void )
{
    epoch_t epoch;
    struct pending_ref *old, *next;

    for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
        assert( !epoch_threads[epoch] );
    }

    /*
     * Even with the work in epoch_leave(), shutdown code doesn't currently
     * observe any epoch, so there might still be references left to free.
     */
    epoch = EPOCH_PREV(current_epoch);
    next = references[epoch];
    references[epoch] = NULL;
    for ( old = next; old; old = next ) {
        next = old->next;

        old->dispose( old->object );
        ch_free( old );
    }

    epoch = current_epoch;
    next = references[epoch];
    references[epoch] = NULL;
    for ( old = next; old; old = next ) {
        next = old->next;

        old->dispose( old->object );
        ch_free( old );
    }

    /* No references should exist anywhere now */
    for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
        assert( !references[epoch] );
    }

    ldap_pvt_thread_rdwr_destroy( &epoch_mutex );
}

epoch_t
epoch_join( void )
{
    epoch_t epoch;
    struct pending_ref *old, *ref = NULL;

retry:
    /* TODO: make this completely lock-free */
    ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
    epoch = current_epoch;
    __atomic_add_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL );
    ldap_pvt_thread_rdwr_runlock( &epoch_mutex );

    if ( __atomic_load_n(
                 &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_ACQUIRE ) ) {
        return epoch;
    }

    __atomic_exchange(
            &references[EPOCH_PREV(epoch)], &ref, &ref, __ATOMIC_ACQ_REL );

    Debug( LDAP_DEBUG_TRACE, "epoch_join: "
            "advancing epoch to %zu with %s objects to free\n",
            EPOCH_NEXT(epoch), ref ? "some" : "no" );

    ldap_pvt_thread_rdwr_wlock( &epoch_mutex );
    current_epoch = EPOCH_NEXT(epoch);
    ldap_pvt_thread_rdwr_wunlock( &epoch_mutex );

    if ( !ref ) {
        return epoch;
    }

    /*
     * The below is now safe to free outside epochs and we don't want to make
     * the current epoch last any longer than necessary.
     *
     * Looks like there might be fairness issues in massively parallel
     * environments but they haven't been observed on 32-core machines.
     */
    epoch_leave( epoch );

    for ( old = ref; old; old = ref ) {
        ref = old->next;

        old->dispose( old->object );
        ch_free( old );
    }

    goto retry;
}

void
epoch_leave( epoch_t epoch )
{
    struct pending_ref *p, *next, *old_refs = NULL, *current_refs = NULL;

    /* Are there other threads observing our epoch? */
    if ( __atomic_sub_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ) ) {
        return;
    }

    /*
     * Optimisation for the case when we're mostly idle. Otherwise we won't
     * release resources until another thread comes by and joins the epoch
     * (twice), and there's no idea how soon (or late) that is going to happen.
     *
     * NB. There is no limit to the number of threads executing the following
     * code in parallel.
     */
    ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
    /*
     * Anything could happen between the subtract and the lock being acquired
     * above, so check again. But once we hold this lock (and confirm no more
     * threads still observe either prospective epoch), noone will be able to
     * finish epoch_join until we've released epoch_mutex since it holds that:
     *
     * epoch_threads[EPOCH_PREV(current_epoch)] == 0
     *
     * and that leads epoch_join() to acquire a write lock on &epoch_mutex.
     */
    if ( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) ) {
        /* Epoch counter has run full circle */
        ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
        return;
    } else if ( epoch == current_epoch ) {
        if ( __atomic_load_n(
                     &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_RELAXED ) ) {
            /* There is another (older) thread still running */
            ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
            return;
        }

        /* We're all alone, it's safe to claim all references and free them. */
        __atomic_exchange( &references[EPOCH_PREV(epoch)], &old_refs,
                &old_refs, __ATOMIC_ACQ_REL );
        __atomic_exchange( &references[epoch], &current_refs, &current_refs,
                __ATOMIC_ACQ_REL );
    } else if ( epoch == EPOCH_PREV(current_epoch) ) {
        if ( __atomic_load_n(
                     &epoch_threads[EPOCH_NEXT(epoch)], __ATOMIC_RELAXED ) ) {
            /* There is another (newer) thread still running */
            ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
            return;
        }

        /* We're all alone, it's safe to claim all references and free them. */
        __atomic_exchange(
                &references[epoch], &old_refs, &old_refs, __ATOMIC_ACQ_REL );
        __atomic_exchange( &references[EPOCH_NEXT(epoch)], &current_refs,
                &current_refs, __ATOMIC_ACQ_REL );
    }
    /*
     * Else the current_epoch has moved far enough that no references remain to
     * be freed.
     */
    ldap_pvt_thread_rdwr_runlock( &epoch_mutex );

    /*
     * Trigger a memory-independent read fence to make sure we're reading the
     * state after all threads actually finished - which might have happened
     * after we acquired epoch_mutex so ldap_pvt_thread_rdwr_rlock would not
     * catch everything.
     *
     * TODO is to confirm the below:
     * It might be that the tests and exchanges above only enforce a fence for
     * the locations affected, so we could still read stale memory for
     * unrelated locations? At least that's the only explanation I've been able
     * to establish for repeated crashes that seem to have gone away with this
     * in place.
     *
     * But then that's contrary to the second example in Acquire/Release
     * section here:
     * https://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync
     */
    __atomic_thread_fence( __ATOMIC_ACQUIRE );

    for ( p = old_refs; p; p = next ) {
        next = p->next;

        p->dispose( p->object );
        ch_free( p );
    }

    for ( p = current_refs; p; p = next ) {
        next = p->next;

        p->dispose( p->object );
        ch_free( p );
    }
}

/*
 * Add the object to the "current global epoch", not the epoch our thread
 * entered.
 */
void
epoch_append( void *ptr, dispose_cb *cb )
{
    struct pending_ref *new;
    epoch_t epoch = __atomic_load_n( &current_epoch, __ATOMIC_ACQUIRE );

    /*
     * BTW, the following is not appropriate here:
     * assert( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) );
     *
     * We might be a thread lagging behind in the "previous epoch" with no
     * other threads executing at all.
     */

    new = ch_malloc( sizeof(struct pending_ref) );
    new->object = ptr;
    new->dispose = cb;
    new->next = __atomic_load_n( &references[epoch], __ATOMIC_ACQUIRE );

    while ( !__atomic_compare_exchange( &references[epoch], &new->next, &new, 0,
            __ATOMIC_RELEASE, __ATOMIC_RELAXED ) )
        /* iterate until we succeed */;
}

int
acquire_ref( uintptr_t *refp )
{
    uintptr_t refcnt, new_refcnt;

    refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );

    /*
     * If we just incremented the refcnt and checked for zero after, another
     * thread might falsely believe the object was going to stick around.
     *
     * Checking whether the object is still dead at disposal time might not be
     * able to distinguish it from being freed in a later epoch.
     */
    do {
        if ( !refcnt ) {
            return refcnt;
        }

        new_refcnt = refcnt + 1;
    } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
            __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
    assert( new_refcnt == refcnt + 1 );

    return refcnt;
}

int
try_release_ref( uintptr_t *refp, void *object, dispose_cb *cb )
{
    uintptr_t refcnt, new_refcnt;

    refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );

    /* We promise the caller that we won't decrease refcnt below 0 */
    do {
        if ( !refcnt ) {
            return refcnt;
        }

        new_refcnt = refcnt - 1;
    } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
            __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
    assert( new_refcnt == refcnt - 1 );

    if ( !new_refcnt ) {
        epoch_append( object, cb );
    }

    return refcnt;
}