diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2021-07-23 11:24:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2021-07-23 11:24:09 +0000 |
commit | e36b37583bebd229102f46c4ed7d2f6fad8697d4 (patch) | |
tree | 73937b6f051fcaaa1ccbdfbaa9f3a1f36bbedb9e /src/ck_epoch.c | |
parent | Initial commit. (diff) | |
download | ck-e36b37583bebd229102f46c4ed7d2f6fad8697d4.tar.xz ck-e36b37583bebd229102f46c4ed7d2f6fad8697d4.zip |
Adding upstream version 0.6.0.upstream/0.6.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/ck_epoch.c')
-rw-r--r-- | src/ck_epoch.c | 545 |
1 files changed, 545 insertions, 0 deletions
diff --git a/src/ck_epoch.c b/src/ck_epoch.c new file mode 100644 index 0000000..a0e9180 --- /dev/null +++ b/src/ck_epoch.c @@ -0,0 +1,545 @@ +/* + * Copyright 2011-2015 Samy Al Bahra. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * The implementation here is inspired from the work described in: + * Fraser, K. 2004. Practical Lock-Freedom. PhD Thesis, University + * of Cambridge Computing Laboratory. + */ + +#include <ck_backoff.h> +#include <ck_cc.h> +#include <ck_epoch.h> +#include <ck_pr.h> +#include <ck_stack.h> +#include <ck_stdbool.h> +#include <ck_string.h> + +/* + * Only three distinct values are used for reclamation, but reclamation occurs + * at e+2 rather than e+1. Any thread in a "critical section" would have + * acquired some snapshot (e) of the global epoch value (e_g) and set an active + * flag. Any hazardous references will only occur after a full memory barrier. + * For example, assume an initial e_g value of 1, e value of 0 and active value + * of 0. + * + * ck_epoch_begin(...) + * e = e_g + * active = 1 + * memory_barrier(); + * + * Any serialized reads may observe e = 0 or e = 1 with active = 0, or e = 0 or + * e = 1 with active = 1. The e_g value can only go from 1 to 2 if every thread + * has already observed the value of "1" (or the value we are incrementing + * from). This guarantees us that for any given value e_g, any threads with-in + * critical sections (referred to as "active" threads from here on) would have + * an e value of e_g-1 or e_g. This also means that hazardous references may be + * shared in both e_g-1 and e_g even if they are logically deleted in e_g. + * + * For example, assume all threads have an e value of e_g. Another thread may + * increment to e_g to e_g+1. Older threads may have a reference to an object + * which is only deleted in e_g+1. It could be that reader threads are + * executing some hash table look-ups, while some other writer thread (which + * causes epoch counter tick) actually deletes the same items that reader + * threads are looking up (this writer thread having an e value of e_g+1). + * This is possible if the writer thread re-observes the epoch after the + * counter tick. + * + * Psuedo-code for writer: + * ck_epoch_begin() + * ht_delete(x) + * ck_epoch_end() + * ck_epoch_begin() + * ht_delete(x) + * ck_epoch_end() + * + * Psuedo-code for reader: + * for (;;) { + * x = ht_lookup(x) + * ck_pr_inc(&x->value); + * } + * + * Of course, it is also possible for references logically deleted at e_g-1 to + * still be accessed at e_g as threads are "active" at the same time + * (real-world time) mutating shared objects. + * + * Now, if the epoch counter is ticked to e_g+1, then no new hazardous + * references could exist to objects logically deleted at e_g-1. The reason for + * this is that at e_g+1, all epoch read-side critical sections started at + * e_g-1 must have been completed. If any epoch read-side critical sections at + * e_g-1 were still active, then we would never increment to e_g+1 (active != 0 + * ^ e != e_g). Additionally, e_g may still have hazardous references to + * objects logically deleted at e_g-1 which means objects logically deleted at + * e_g-1 cannot be deleted at e_g+1 unless all threads have observed e_g+1 + * (since it is valid for active threads to be at e_g and threads at e_g still + * require safe memory accesses). + * + * However, at e_g+2, all active threads must be either at e_g+1 or e_g+2. + * Though e_g+2 may share hazardous references with e_g+1, and e_g+1 shares + * hazardous references to e_g, no active threads are at e_g or e_g-1. This + * means no hazardous references could exist to objects deleted at e_g-1 (at + * e_g+2). + * + * To summarize these important points, + * 1) Active threads will always have a value of e_g or e_g-1. + * 2) Items that are logically deleted e_g or e_g-1 cannot be physically + * deleted. + * 3) Objects logically deleted at e_g-1 can be physically destroyed at e_g+2 + * or at e_g+1 if no threads are at e_g. + * + * Last but not least, if we are at e_g+2, then no active thread is at e_g + * which means it is safe to apply modulo-3 arithmetic to e_g value in order to + * re-use e_g to represent the e_g+3 state. This means it is sufficient to + * represent e_g using only the values 0, 1 or 2. Every time a thread re-visits + * a e_g (which can be determined with a non-empty deferral list) it can assume + * objects in the e_g deferral list involved at least three e_g transitions and + * are thus, safe, for physical deletion. + * + * Blocking semantics for epoch reclamation have additional restrictions. + * Though we only require three deferral lists, reasonable blocking semantics + * must be able to more gracefully handle bursty write work-loads which could + * easily cause e_g wrap-around if modulo-3 arithmetic is used. This allows for + * easy-to-trigger live-lock situations. The work-around to this is to not + * apply modulo arithmetic to e_g but only to deferral list indexing. + */ +#define CK_EPOCH_GRACE 3U + +enum { + CK_EPOCH_STATE_USED = 0, + CK_EPOCH_STATE_FREE = 1 +}; + +CK_STACK_CONTAINER(struct ck_epoch_record, record_next, + ck_epoch_record_container) +CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry, + ck_epoch_entry_container) + +#define CK_EPOCH_SENSE_MASK (CK_EPOCH_SENSE - 1) + +void +_ck_epoch_delref(struct ck_epoch_record *record, + struct ck_epoch_section *section) +{ + struct ck_epoch_ref *current, *other; + unsigned int i = section->bucket; + + current = &record->local.bucket[i]; + current->count--; + + if (current->count > 0) + return; + + /* + * If the current bucket no longer has any references, then + * determine whether we have already transitioned into a newer + * epoch. If so, then make sure to update our shared snapshot + * to allow for forward progress. + * + * If no other active bucket exists, then the record will go + * inactive in order to allow for forward progress. + */ + other = &record->local.bucket[(i + 1) & + CK_EPOCH_SENSE_MASK]; + if (other->count > 0 && + ((int)(current->epoch - other->epoch) < 0)) { + /* + * The other epoch value is actually the newest, + * transition to it. + */ + ck_pr_store_uint(&record->epoch, other->epoch); + } + + return; +} + +void +_ck_epoch_addref(struct ck_epoch_record *record, + struct ck_epoch_section *section) +{ + struct ck_epoch *global = record->global; + struct ck_epoch_ref *ref; + unsigned int epoch, i; + + epoch = ck_pr_load_uint(&global->epoch); + i = epoch & CK_EPOCH_SENSE_MASK; + ref = &record->local.bucket[i]; + + if (ref->count++ == 0) { +#ifndef CK_MD_TSO + struct ck_epoch_ref *previous; + + /* + * The system has already ticked. If another non-zero bucket + * exists, make sure to order our observations with respect + * to it. Otherwise, it is possible to acquire a reference + * from the previous epoch generation. + * + * On TSO architectures, the monoticity of the global counter + * and load-{store, load} ordering are sufficient to guarantee + * this ordering. + */ + previous = &record->local.bucket[(i + 1) & + CK_EPOCH_SENSE_MASK]; + if (previous->count > 0) + ck_pr_fence_acqrel(); +#endif /* !CK_MD_TSO */ + + /* + * If this is this is a new reference into the current + * bucket then cache the associated epoch value. + */ + ref->epoch = epoch; + } + + section->bucket = i; + return; +} + +void +ck_epoch_init(struct ck_epoch *global) +{ + + ck_stack_init(&global->records); + global->epoch = 1; + global->n_free = 0; + ck_pr_fence_store(); + return; +} + +struct ck_epoch_record * +ck_epoch_recycle(struct ck_epoch *global) +{ + struct ck_epoch_record *record; + ck_stack_entry_t *cursor; + unsigned int state; + + if (ck_pr_load_uint(&global->n_free) == 0) + return NULL; + + CK_STACK_FOREACH(&global->records, cursor) { + record = ck_epoch_record_container(cursor); + + if (ck_pr_load_uint(&record->state) == CK_EPOCH_STATE_FREE) { + /* Serialize with respect to deferral list clean-up. */ + ck_pr_fence_load(); + state = ck_pr_fas_uint(&record->state, + CK_EPOCH_STATE_USED); + if (state == CK_EPOCH_STATE_FREE) { + ck_pr_dec_uint(&global->n_free); + return record; + } + } + } + + return NULL; +} + +void +ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record) +{ + size_t i; + + record->global = global; + record->state = CK_EPOCH_STATE_USED; + record->active = 0; + record->epoch = 0; + record->n_dispatch = 0; + record->n_peak = 0; + record->n_pending = 0; + memset(&record->local, 0, sizeof record->local); + + for (i = 0; i < CK_EPOCH_LENGTH; i++) + ck_stack_init(&record->pending[i]); + + ck_pr_fence_store(); + ck_stack_push_upmc(&global->records, &record->record_next); + return; +} + +void +ck_epoch_unregister(struct ck_epoch_record *record) +{ + struct ck_epoch *global = record->global; + size_t i; + + record->active = 0; + record->epoch = 0; + record->n_dispatch = 0; + record->n_peak = 0; + record->n_pending = 0; + memset(&record->local, 0, sizeof record->local); + + for (i = 0; i < CK_EPOCH_LENGTH; i++) + ck_stack_init(&record->pending[i]); + + ck_pr_fence_store(); + ck_pr_store_uint(&record->state, CK_EPOCH_STATE_FREE); + ck_pr_inc_uint(&global->n_free); + return; +} + +static struct ck_epoch_record * +ck_epoch_scan(struct ck_epoch *global, + struct ck_epoch_record *cr, + unsigned int epoch, + bool *af) +{ + ck_stack_entry_t *cursor; + + if (cr == NULL) { + cursor = CK_STACK_FIRST(&global->records); + *af = false; + } else { + cursor = &cr->record_next; + *af = true; + } + + while (cursor != NULL) { + unsigned int state, active; + + cr = ck_epoch_record_container(cursor); + + state = ck_pr_load_uint(&cr->state); + if (state & CK_EPOCH_STATE_FREE) { + cursor = CK_STACK_NEXT(cursor); + continue; + } + + active = ck_pr_load_uint(&cr->active); + *af |= active; + + if (active != 0 && ck_pr_load_uint(&cr->epoch) != epoch) + return cr; + + cursor = CK_STACK_NEXT(cursor); + } + + return NULL; +} + +static void +ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e) +{ + unsigned int epoch = e & (CK_EPOCH_LENGTH - 1); + ck_stack_entry_t *head, *next, *cursor; + unsigned int i = 0; + + head = CK_STACK_FIRST(&record->pending[epoch]); + ck_stack_init(&record->pending[epoch]); + + for (cursor = head; cursor != NULL; cursor = next) { + struct ck_epoch_entry *entry = + ck_epoch_entry_container(cursor); + + next = CK_STACK_NEXT(cursor); + entry->function(entry); + i++; + } + + if (record->n_pending > record->n_peak) + record->n_peak = record->n_pending; + + record->n_dispatch += i; + record->n_pending -= i; + return; +} + +/* + * Reclaim all objects associated with a record. + */ +void +ck_epoch_reclaim(struct ck_epoch_record *record) +{ + unsigned int epoch; + + for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++) + ck_epoch_dispatch(record, epoch); + + return; +} + +/* + * This function must not be called with-in read section. + */ +void +ck_epoch_synchronize(struct ck_epoch_record *record) +{ + struct ck_epoch *global = record->global; + struct ck_epoch_record *cr; + unsigned int delta, epoch, goal, i; + bool active; + + ck_pr_fence_memory(); + + /* + * The observation of the global epoch must be ordered with respect to + * all prior operations. The re-ordering of loads is permitted given + * monoticity of global epoch counter. + * + * If UINT_MAX concurrent mutations were to occur then it is possible + * to encounter an ABA-issue. If this is a concern, consider tuning + * write-side concurrency. + */ + delta = epoch = ck_pr_load_uint(&global->epoch); + goal = epoch + CK_EPOCH_GRACE; + + for (i = 0, cr = NULL; i < CK_EPOCH_GRACE - 1; cr = NULL, i++) { + bool r; + + /* + * Determine whether all threads have observed the current + * epoch with respect to the updates on invocation. + */ + while (cr = ck_epoch_scan(global, cr, delta, &active), + cr != NULL) { + unsigned int e_d; + + ck_pr_stall(); + + /* + * Another writer may have already observed a grace + * period. + */ + e_d = ck_pr_load_uint(&global->epoch); + if (e_d != delta) { + delta = e_d; + goto reload; + } + } + + /* + * If we have observed all threads as inactive, then we assume + * we are at a grace period. + */ + if (active == false) + break; + + /* + * Increment current epoch. CAS semantics are used to eliminate + * increment operations for synchronization that occurs for the + * same global epoch value snapshot. + * + * If we can guarantee there will only be one active barrier or + * epoch tick at a given time, then it is sufficient to use an + * increment operation. In a multi-barrier workload, however, + * it is possible to overflow the epoch value if we apply + * modulo-3 arithmetic. + */ + r = ck_pr_cas_uint_value(&global->epoch, delta, delta + 1, + &delta); + + /* Order subsequent thread active checks. */ + ck_pr_fence_atomic_load(); + + /* + * If CAS has succeeded, then set delta to latest snapshot. + * Otherwise, we have just acquired latest snapshot. + */ + delta = delta + r; + continue; + +reload: + if ((goal > epoch) & (delta >= goal)) { + /* + * Right now, epoch overflow is handled as an edge + * case. If we have already observed an epoch + * generation, then we can be sure no hazardous + * references exist to objects from this generation. We + * can actually avoid an addtional scan step at this + * point. + */ + break; + } + } + + /* + * A majority of use-cases will not require full barrier semantics. + * However, if non-temporal instructions are used, full barrier + * semantics are necessary. + */ + ck_pr_fence_memory(); + record->epoch = delta; + return; +} + +void +ck_epoch_barrier(struct ck_epoch_record *record) +{ + + ck_epoch_synchronize(record); + ck_epoch_reclaim(record); + return; +} + +/* + * It may be worth it to actually apply these deferral semantics to an epoch + * that was observed at ck_epoch_call time. The problem is that the latter + * would require a full fence. + * + * ck_epoch_call will dispatch to the latest epoch snapshot that was observed. + * There are cases where it will fail to reclaim as early as it could. If this + * becomes a problem, we could actually use a heap for epoch buckets but that + * is far from ideal too. + */ +bool +ck_epoch_poll(struct ck_epoch_record *record) +{ + bool active; + unsigned int epoch; + unsigned int snapshot; + struct ck_epoch_record *cr = NULL; + struct ck_epoch *global = record->global; + + epoch = ck_pr_load_uint(&global->epoch); + + /* Serialize epoch snapshots with respect to global epoch. */ + ck_pr_fence_memory(); + cr = ck_epoch_scan(global, cr, epoch, &active); + if (cr != NULL) { + record->epoch = epoch; + return false; + } + + /* We are at a grace period if all threads are inactive. */ + if (active == false) { + record->epoch = epoch; + for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++) + ck_epoch_dispatch(record, epoch); + + return true; + } + + /* If an active thread exists, rely on epoch observation. */ + if (ck_pr_cas_uint_value(&global->epoch, epoch, epoch + 1, + &snapshot) == false) { + record->epoch = snapshot; + } else { + record->epoch = epoch + 1; + } + + ck_epoch_dispatch(record, epoch + 1); + return true; +} |