Adding upstream version 16.2.11+ds.upstream/16.2.11+ds upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 18:45:59 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 18:45:59 +0000
commit: 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree: 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/dpdk/lib/librte_rcu
parent: Initial commit. (diff)
download: ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
6 files changed, 1515 insertions, 0 deletions
diff --git a/src/spdk/dpdk/lib/librte_rcu/Makefile b/src/spdk/dpdk/lib/librte_rcu/Makefile
new file mode 100644
index 000000000..553bca2ef
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_rcu/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Arm Limited
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_rcu.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+LDLIBS += -lrte_eal -lrte_ring
+
+EXPORT_MAP := rte_rcu_version.map
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_RCU) := rte_rcu_qsbr.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_RCU)-include := rte_rcu_qsbr.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/src/spdk/dpdk/lib/librte_rcu/meson.build b/src/spdk/dpdk/lib/librte_rcu/meson.build
new file mode 100644
index 000000000..62fa9d9a7
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_rcu/meson.build
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Arm Limited
+
+sources = files('rte_rcu_qsbr.c')
+headers = files('rte_rcu_qsbr.h')
+
+deps += ['ring']
+build = false
+reason = 'not needed by SPDK'
diff --git a/src/spdk/dpdk/lib/librte_rcu/rcu_qsbr_pvt.h b/src/spdk/dpdk/lib/librte_rcu/rcu_qsbr_pvt.h
new file mode 100644
index 000000000..196b9abd6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_rcu/rcu_qsbr_pvt.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Arm Limited
+ */
+
+#ifndef _RTE_RCU_QSBR_PVT_H_
+#define _RTE_RCU_QSBR_PVT_H_
+
+/**
+ * This file is private to the RCU library. It should not be included
+ * by the user of this library.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_ring.h>
+#include <rte_ring_elem.h>
+
+#include "rte_rcu_qsbr.h"
+
+/* Defer queue structure.
+ * This structure holds the defer queue. The defer queue is used to
+ * hold the deleted entries from the data structure that are not
+ * yet freed.
+ */
+struct rte_rcu_qsbr_dq {
+	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/
+	struct rte_ring *r;     /**< RCU QSBR defer queue. */
+	uint32_t size;
+	/**< Number of elements in the defer queue */
+	uint32_t esize;
+	/**< Size (in bytes) of data, including the token, stored on the
+	 *   defer queue.
+	 */
+	uint32_t trigger_reclaim_limit;
+	/**< Trigger automatic reclamation after the defer queue
+	 *   has at least these many resources waiting.
+	 */
+	uint32_t max_reclaim_size;
+	/**< Reclaim at the max these many resources during auto
+	 *   reclamation.
+	 */
+	rte_rcu_qsbr_free_resource_t free_fn;
+	/**< Function to call to free the resource. */
+	void *p;
+	/**< Pointer passed to the free function. Typically, this is the
+	 *   pointer to the data structure to which the resource to free
+	 *   belongs.
+	 */
+};
+
+/* Internal structure to represent the element on the defer queue.
+ * Use alias as a character array is type casted to a variable
+ * of this structure type.
+ */
+typedef struct {
+	uint64_t token;  /**< Token */
+	uint8_t elem[0]; /**< Pointer to user element */
+} __attribute__((__may_alias__)) __rte_rcu_qsbr_dq_elem_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RCU_QSBR_PVT_H_ */
diff --git a/src/spdk/dpdk/lib/librte_rcu/rte_rcu_qsbr.c b/src/spdk/dpdk/lib/librte_rcu/rte_rcu_qsbr.c
new file mode 100644
index 000000000..6a429d8b3
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_rcu/rte_rcu_qsbr.c
@@ -0,0 +1,505 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2018-2020 Arm Limited
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_atomic.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_errno.h>
+#include <rte_ring_elem.h>
+
+#include "rte_rcu_qsbr.h"
+#include "rcu_qsbr_pvt.h"
+
+/* Get the memory size of QSBR variable */
+size_t
+rte_rcu_qsbr_get_memsize(uint32_t max_threads)
+{
+	size_t sz;
+
+	if (max_threads == 0) {
+		rte_log(RTE_LOG_ERR, rte_rcu_log_type,
+			"%s(): Invalid max_threads %u\n",
+			__func__, max_threads);
+		rte_errno = EINVAL;
+
+		return 1;
+	}
+
+	sz = sizeof(struct rte_rcu_qsbr);
+
+	/* Add the size of quiescent state counter array */
+	sz += sizeof(struct rte_rcu_qsbr_cnt) * max_threads;
+
+	/* Add the size of the registered thread ID bitmap array */
+	sz += __RTE_QSBR_THRID_ARRAY_SIZE(max_threads);
+
+	return sz;
+}
+
+/* Initialize a quiescent state variable */
+int
+rte_rcu_qsbr_init(struct rte_rcu_qsbr *v, uint32_t max_threads)
+{
+	size_t sz;
+
+	if (v == NULL) {
+		rte_log(RTE_LOG_ERR, rte_rcu_log_type,
+			"%s(): Invalid input parameter\n", __func__);
+		rte_errno = EINVAL;
+
+		return 1;
+	}
+
+	sz = rte_rcu_qsbr_get_memsize(max_threads);
+	if (sz == 1)
+		return 1;
+
+	/* Set all the threads to offline */
+	memset(v, 0, sz);
+	v->max_threads = max_threads;
+	v->num_elems = RTE_ALIGN_MUL_CEIL(max_threads,
+			__RTE_QSBR_THRID_ARRAY_ELM_SIZE) /
+			__RTE_QSBR_THRID_ARRAY_ELM_SIZE;
+	v->token = __RTE_QSBR_CNT_INIT;
+	v->acked_token = __RTE_QSBR_CNT_INIT - 1;
+
+	return 0;
+}
+
+/* Register a reader thread to report its quiescent state
+ * on a QS variable.
+ */
+int
+rte_rcu_qsbr_thread_register(struct rte_rcu_qsbr *v, unsigned int thread_id)
+{
+	unsigned int i, id, success;
+	uint64_t old_bmap, new_bmap;
+
+	if (v == NULL || thread_id >= v->max_threads) {
+		rte_log(RTE_LOG_ERR, rte_rcu_log_type,
+			"%s(): Invalid input parameter\n", __func__);
+		rte_errno = EINVAL;
+
+		return 1;
+	}
+
+	__RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, ERR, "Lock counter %u\n",
+				v->qsbr_cnt[thread_id].lock_cnt);
+
+	id = thread_id & __RTE_QSBR_THRID_MASK;
+	i = thread_id >> __RTE_QSBR_THRID_INDEX_SHIFT;
+
+	/* Make sure that the counter for registered threads does not
+	 * go out of sync. Hence, additional checks are required.
+	 */
+	/* Check if the thread is already registered */
+	old_bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
+					__ATOMIC_RELAXED);
+	if (old_bmap & 1UL << id)
+		return 0;
+
+	do {
+		new_bmap = old_bmap | (1UL << id);
+		success = __atomic_compare_exchange(
+					__RTE_QSBR_THRID_ARRAY_ELM(v, i),
+					&old_bmap, &new_bmap, 0,
+					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
+
+		if (success)
+			__atomic_fetch_add(&v->num_threads,
+						1, __ATOMIC_RELAXED);
+		else if (old_bmap & (1UL << id))
+			/* Someone else registered this thread.
+			 * Counter should not be incremented.
+			 */
+			return 0;
+	} while (success == 0);
+
+	return 0;
+}
+
+/* Remove a reader thread, from the list of threads reporting their
+ * quiescent state on a QS variable.
+ */
+int
+rte_rcu_qsbr_thread_unregister(struct rte_rcu_qsbr *v, unsigned int thread_id)
+{
+	unsigned int i, id, success;
+	uint64_t old_bmap, new_bmap;
+
+	if (v == NULL || thread_id >= v->max_threads) {
+		rte_log(RTE_LOG_ERR, rte_rcu_log_type,
+			"%s(): Invalid input parameter\n", __func__);
+		rte_errno = EINVAL;
+
+		return 1;
+	}
+
+	__RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, ERR, "Lock counter %u\n",
+				v->qsbr_cnt[thread_id].lock_cnt);
+
+	id = thread_id & __RTE_QSBR_THRID_MASK;
+	i = thread_id >> __RTE_QSBR_THRID_INDEX_SHIFT;
+
+	/* Make sure that the counter for registered threads does not
+	 * go out of sync. Hence, additional checks are required.
+	 */
+	/* Check if the thread is already unregistered */
+	old_bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
+					__ATOMIC_RELAXED);
+	if (!(old_bmap & (1UL << id)))
+		return 0;
+
+	do {
+		new_bmap = old_bmap & ~(1UL << id);
+		/* Make sure any loads of the shared data structure are
+		 * completed before removal of the thread from the list of
+		 * reporting threads.
+		 */
+		success = __atomic_compare_exchange(
+					__RTE_QSBR_THRID_ARRAY_ELM(v, i),
+					&old_bmap, &new_bmap, 0,
+					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
+
+		if (success)
+			__atomic_fetch_sub(&v->num_threads,
+						1, __ATOMIC_RELAXED);
+		else if (!(old_bmap & (1UL << id)))
+			/* Someone else unregistered this thread.
+			 * Counter should not be incremented.
+			 */
+			return 0;
+	} while (success == 0);
+
+	return 0;
+}
+
+/* Wait till the reader threads have entered quiescent state. */
+void
+rte_rcu_qsbr_synchronize(struct rte_rcu_qsbr *v, unsigned int thread_id)
+{
+	uint64_t t;
+
+	RTE_ASSERT(v != NULL);
+
+	t = rte_rcu_qsbr_start(v);
+
+	/* If the current thread has readside critical section,
+	 * update its quiescent state status.
+	 */
+	if (thread_id != RTE_QSBR_THRID_INVALID)
+		rte_rcu_qsbr_quiescent(v, thread_id);
+
+	/* Wait for other readers to enter quiescent state */
+	rte_rcu_qsbr_check(v, t, true);
+}
+
+/* Dump the details of a single quiescent state variable to a file. */
+int
+rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)
+{
+	uint64_t bmap;
+	uint32_t i, t, id;
+
+	if (v == NULL || f == NULL) {
+		rte_log(RTE_LOG_ERR, rte_rcu_log_type,
+			"%s(): Invalid input parameter\n", __func__);
+		rte_errno = EINVAL;
+
+		return 1;
+	}
+
+	fprintf(f, "\nQuiescent State Variable @%p\n", v);
+
+	fprintf(f, "  QS variable memory size = %zu\n",
+				rte_rcu_qsbr_get_memsize(v->max_threads));
+	fprintf(f, "  Given # max threads = %u\n", v->max_threads);
+	fprintf(f, "  Current # threads = %u\n", v->num_threads);
+
+	fprintf(f, "  Registered thread IDs = ");
+	for (i = 0; i < v->num_elems; i++) {
+		bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
+					__ATOMIC_ACQUIRE);
+		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
+		while (bmap) {
+			t = __builtin_ctzl(bmap);
+			fprintf(f, "%u ", id + t);
+
+			bmap &= ~(1UL << t);
+		}
+	}
+
+	fprintf(f, "\n");
+
+	fprintf(f, "  Token = %"PRIu64"\n",
+			__atomic_load_n(&v->token, __ATOMIC_ACQUIRE));
+
+	fprintf(f, "  Least Acknowledged Token = %"PRIu64"\n",
+			__atomic_load_n(&v->acked_token, __ATOMIC_ACQUIRE));
+
+	fprintf(f, "Quiescent State Counts for readers:\n");
+	for (i = 0; i < v->num_elems; i++) {
+		bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
+					__ATOMIC_ACQUIRE);
+		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
+		while (bmap) {
+			t = __builtin_ctzl(bmap);
+			fprintf(f, "thread ID = %u, count = %"PRIu64", lock count = %u\n",
+				id + t,
+				__atomic_load_n(
+					&v->qsbr_cnt[id + t].cnt,
+					__ATOMIC_RELAXED),
+				__atomic_load_n(
+					&v->qsbr_cnt[id + t].lock_cnt,
+					__ATOMIC_RELAXED));
+			bmap &= ~(1UL << t);
+		}
+	}
+
+	return 0;
+}
+
+/* Create a queue used to store the data structure elements that can
+ * be freed later. This queue is referred to as 'defer queue'.
+ */
+struct rte_rcu_qsbr_dq *
+rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params)
+{
+	struct rte_rcu_qsbr_dq *dq;
+	uint32_t qs_fifo_size;
+	unsigned int flags;
+
+	if (params == NULL || params->free_fn == NULL ||
+		params->v == NULL || params->name == NULL ||
+		params->size == 0 || params->esize == 0 ||
+		(params->esize % 4 != 0)) {
+		rte_log(RTE_LOG_ERR, rte_rcu_log_type,
+			"%s(): Invalid input parameter\n", __func__);
+		rte_errno = EINVAL;
+
+		return NULL;
+	}
+	/* If auto reclamation is configured, reclaim limit
+	 * should be a valid value.
+	 */
+	if ((params->trigger_reclaim_limit <= params->size) &&
+	    (params->max_reclaim_size == 0)) {
+		rte_log(RTE_LOG_ERR, rte_rcu_log_type,
+			"%s(): Invalid input parameter, size = %u, trigger_reclaim_limit = %u, max_reclaim_size = %u\n",
+			__func__, params->size, params->trigger_reclaim_limit,
+			params->max_reclaim_size);
+		rte_errno = EINVAL;
+
+		return NULL;
+	}
+
+	dq = rte_zmalloc(NULL, sizeof(struct rte_rcu_qsbr_dq),
+			 RTE_CACHE_LINE_SIZE);
+	if (dq == NULL) {
+		rte_errno = ENOMEM;
+
+		return NULL;
+	}
+
+	/* Decide the flags for the ring.
+	 * If MT safety is requested, use RTS for ring enqueue as most
+	 * use cases involve dq-enqueue happening on the control plane.
+	 * Ring dequeue is always HTS due to the possibility of revert.
+	 */
+	flags = RING_F_MP_RTS_ENQ;
+	if (params->flags & RTE_RCU_QSBR_DQ_MT_UNSAFE)
+		flags = RING_F_SP_ENQ;
+	flags |= RING_F_MC_HTS_DEQ;
+	/* round up qs_fifo_size to next power of two that is not less than
+	 * max_size.
+	 */
+	qs_fifo_size = rte_align32pow2(params->size + 1);
+	/* Add token size to ring element size */
+	dq->r = rte_ring_create_elem(params->name,
+			__RTE_QSBR_TOKEN_SIZE + params->esize,
+			qs_fifo_size, SOCKET_ID_ANY, flags);
+	if (dq->r == NULL) {
+		rte_log(RTE_LOG_ERR, rte_rcu_log_type,
+			"%s(): defer queue create failed\n", __func__);
+		rte_free(dq);
+		return NULL;
+	}
+
+	dq->v = params->v;
+	dq->size = params->size;
+	dq->esize = __RTE_QSBR_TOKEN_SIZE + params->esize;
+	dq->trigger_reclaim_limit = params->trigger_reclaim_limit;
+	dq->max_reclaim_size = params->max_reclaim_size;
+	dq->free_fn = params->free_fn;
+	dq->p = params->p;
+
+	return dq;
+}
+
+/* Enqueue one resource to the defer queue to free after the grace
+ * period is over.
+ */
+int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e)
+{
+	__rte_rcu_qsbr_dq_elem_t *dq_elem;
+	uint32_t cur_size;
+
+	if (dq == NULL || e == NULL) {
+		rte_log(RTE_LOG_ERR, rte_rcu_log_type,
+			"%s(): Invalid input parameter\n", __func__);
+		rte_errno = EINVAL;
+
+		return 1;
+	}
+
+	char data[dq->esize];
+	dq_elem = (__rte_rcu_qsbr_dq_elem_t *)data;
+	/* Start the grace period */
+	dq_elem->token = rte_rcu_qsbr_start(dq->v);
+
+	/* Reclaim resources if the queue size has hit the reclaim
+	 * limit. This helps the queue from growing too large and
+	 * allows time for reader threads to report their quiescent state.
+	 */
+	cur_size = rte_ring_count(dq->r);
+	if (cur_size > dq->trigger_reclaim_limit) {
+		rte_log(RTE_LOG_INFO, rte_rcu_log_type,
+			"%s(): Triggering reclamation\n", __func__);
+		rte_rcu_qsbr_dq_reclaim(dq, dq->max_reclaim_size,
+						NULL, NULL, NULL);
+	}
+
+	/* Enqueue the token and resource. Generating the token and
+	 * enqueuing (token + resource) on the queue is not an
+	 * atomic operation. When the defer queue is shared by multiple
+	 * writers, this might result in tokens enqueued out of order
+	 * on the queue. So, some tokens might wait longer than they
+	 * are required to be reclaimed.
+	 */
+	memcpy(dq_elem->elem, e, dq->esize - __RTE_QSBR_TOKEN_SIZE);
+	/* Check the status as enqueue might fail since the other threads
+	 * might have used up the freed space.
+	 * Enqueue uses the configured flags when the DQ was created.
+	 */
+	if (rte_ring_enqueue_elem(dq->r, data, dq->esize) != 0) {
+		rte_log(RTE_LOG_ERR, rte_rcu_log_type,
+			"%s(): Enqueue failed\n", __func__);
+		/* Note that the token generated above is not used.
+		 * Other than wasting tokens, it should not cause any
+		 * other issues.
+		 */
+		rte_log(RTE_LOG_INFO, rte_rcu_log_type,
+			"%s(): Skipped enqueuing token = %"PRIu64"\n",
+			__func__, dq_elem->token);
+
+		rte_errno = ENOSPC;
+		return 1;
+	}
+
+	rte_log(RTE_LOG_INFO, rte_rcu_log_type,
+		"%s(): Enqueued token = %"PRIu64"\n", __func__, dq_elem->token);
+
+	return 0;
+}
+
+/* Reclaim resources from the defer queue. */
+int
+rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq, unsigned int n,
+			unsigned int *freed, unsigned int *pending,
+			unsigned int *available)
+{
+	uint32_t cnt;
+	__rte_rcu_qsbr_dq_elem_t *dq_elem;
+
+	if (dq == NULL || n == 0) {
+		rte_log(RTE_LOG_ERR, rte_rcu_log_type,
+			"%s(): Invalid input parameter\n", __func__);
+		rte_errno = EINVAL;
+
+		return 1;
+	}
+
+	cnt = 0;
+
+	char data[dq->esize];
+	/* Check reader threads quiescent state and reclaim resources */
+	while (cnt < n &&
+		rte_ring_dequeue_bulk_elem_start(dq->r, &data,
+					dq->esize, 1, available) != 0) {
+		dq_elem = (__rte_rcu_qsbr_dq_elem_t *)data;
+
+		/* Reclaim the resource */
+		if (rte_rcu_qsbr_check(dq->v, dq_elem->token, false) != 1) {
+			rte_ring_dequeue_elem_finish(dq->r, 0);
+			break;
+		}
+		rte_ring_dequeue_elem_finish(dq->r, 1);
+
+		rte_log(RTE_LOG_INFO, rte_rcu_log_type,
+			"%s(): Reclaimed token = %"PRIu64"\n",
+			__func__, dq_elem->token);
+
+		dq->free_fn(dq->p, dq_elem->elem, 1);
+
+		cnt++;
+	}
+
+	rte_log(RTE_LOG_INFO, rte_rcu_log_type,
+		"%s(): Reclaimed %u resources\n", __func__, cnt);
+
+	if (freed != NULL)
+		*freed = cnt;
+	if (pending != NULL)
+		*pending = rte_ring_count(dq->r);
+
+	return 0;
+}
+
+/* Delete a defer queue. */
+int
+rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq)
+{
+	unsigned int pending;
+
+	if (dq == NULL) {
+		rte_log(RTE_LOG_DEBUG, rte_rcu_log_type,
+			"%s(): Invalid input parameter\n", __func__);
+
+		return 0;
+	}
+
+	/* Reclaim all the resources */
+	rte_rcu_qsbr_dq_reclaim(dq, ~0, NULL, &pending, NULL);
+	if (pending != 0) {
+		rte_errno = EAGAIN;
+
+		return 1;
+	}
+
+	rte_ring_free(dq->r);
+	rte_free(dq);
+
+	return 0;
+}
+
+int rte_rcu_log_type;
+
+RTE_INIT(rte_rcu_register)
+{
+	rte_rcu_log_type = rte_log_register("lib.rcu");
+	if (rte_rcu_log_type >= 0)
+		rte_log_set_level(rte_rcu_log_type, RTE_LOG_ERR);
+}
diff --git a/src/spdk/dpdk/lib/librte_rcu/rte_rcu_qsbr.h b/src/spdk/dpdk/lib/librte_rcu/rte_rcu_qsbr.h
new file mode 100644
index 000000000..fd4eb52b7
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_rcu/rte_rcu_qsbr.h
@@ -0,0 +1,898 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018-2020 Arm Limited
+ */
+
+#ifndef _RTE_RCU_QSBR_H_
+#define _RTE_RCU_QSBR_H_
+
+/**
+ * @file
+ * RTE Quiescent State Based Reclamation (QSBR)
+ *
+ * Quiescent State (QS) is any point in the thread execution
+ * where the thread does not hold a reference to a data structure
+ * in shared memory. While using lock-less data structures, the writer
+ * can safely free memory once all the reader threads have entered
+ * quiescent state.
+ *
+ * This library provides the ability for the readers to report quiescent
+ * state and for the writers to identify when all the readers have
+ * entered quiescent state.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_atomic.h>
+#include <rte_ring.h>
+
+extern int rte_rcu_log_type;
+
+#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
+#define __RTE_RCU_DP_LOG(level, fmt, args...) \
+	rte_log(RTE_LOG_ ## level, rte_rcu_log_type, \
+		"%s(): " fmt "\n", __func__, ## args)
+#else
+#define __RTE_RCU_DP_LOG(level, fmt, args...)
+#endif
+
+#if defined(RTE_LIBRTE_RCU_DEBUG)
+#define __RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, level, fmt, args...) do {\
+	if (v->qsbr_cnt[thread_id].lock_cnt) \
+		rte_log(RTE_LOG_ ## level, rte_rcu_log_type, \
+			"%s(): " fmt "\n", __func__, ## args); \
+} while (0)
+#else
+#define __RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, level, fmt, args...)
+#endif
+
+/* Registered thread IDs are stored as a bitmap of 64b element array.
+ * Given thread id needs to be converted to index into the array and
+ * the id within the array element.
+ */
+#define __RTE_QSBR_THRID_ARRAY_ELM_SIZE (sizeof(uint64_t) * 8)
+#define __RTE_QSBR_THRID_ARRAY_SIZE(max_threads) \
+	RTE_ALIGN(RTE_ALIGN_MUL_CEIL(max_threads, \
+		__RTE_QSBR_THRID_ARRAY_ELM_SIZE) >> 3, RTE_CACHE_LINE_SIZE)
+#define __RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t *) \
+	((struct rte_rcu_qsbr_cnt *)(v + 1) + v->max_threads) + i)
+#define __RTE_QSBR_THRID_INDEX_SHIFT 6
+#define __RTE_QSBR_THRID_MASK 0x3f
+#define RTE_QSBR_THRID_INVALID 0xffffffff
+
+/* Worker thread counter */
+struct rte_rcu_qsbr_cnt {
+	uint64_t cnt;
+	/**< Quiescent state counter. Value 0 indicates the thread is offline
+	 *   64b counter is used to avoid adding more code to address
+	 *   counter overflow. Changing this to 32b would require additional
+	 *   changes to various APIs.
+	 */
+	uint32_t lock_cnt;
+	/**< Lock counter. Used when CONFIG_RTE_LIBRTE_RCU_DEBUG is enabled */
+} __rte_cache_aligned;
+
+#define __RTE_QSBR_CNT_THR_OFFLINE 0
+#define __RTE_QSBR_CNT_INIT 1
+#define __RTE_QSBR_CNT_MAX ((uint64_t)~0)
+#define __RTE_QSBR_TOKEN_SIZE sizeof(uint64_t)
+
+/* RTE Quiescent State variable structure.
+ * This structure has two elements that vary in size based on the
+ * 'max_threads' parameter.
+ * 1) Quiescent state counter array
+ * 2) Register thread ID array
+ */
+struct rte_rcu_qsbr {
+	uint64_t token __rte_cache_aligned;
+	/**< Counter to allow for multiple concurrent quiescent state queries */
+	uint64_t acked_token;
+	/**< Least token acked by all the threads in the last call to
+	 *   rte_rcu_qsbr_check API.
+	 */
+
+	uint32_t num_elems __rte_cache_aligned;
+	/**< Number of elements in the thread ID array */
+	uint32_t num_threads;
+	/**< Number of threads currently using this QS variable */
+	uint32_t max_threads;
+	/**< Maximum number of threads using this QS variable */
+
+	struct rte_rcu_qsbr_cnt qsbr_cnt[0] __rte_cache_aligned;
+	/**< Quiescent state counter array of 'max_threads' elements */
+
+	/**< Registered thread IDs are stored in a bitmap array,
+	 *   after the quiescent state counter array.
+	 */
+} __rte_cache_aligned;
+
+/**
+ * Call back function called to free the resources.
+ *
+ * @param p
+ *   Pointer provided while creating the defer queue
+ * @param e
+ *   Pointer to the resource data stored on the defer queue
+ * @param n
+ *   Number of resources to free. Currently, this is set to 1.
+ *
+ * @return
+ *   None
+ */
+typedef void (*rte_rcu_qsbr_free_resource_t)(void *p, void *e, unsigned int n);
+
+#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE
+
+/**
+ * Various flags supported.
+ */
+/**< Enqueue and reclaim operations are multi-thread safe by default.
+ *   The call back functions registered to free the resources are
+ *   assumed to be multi-thread safe.
+ *   Set this flag if multi-thread safety is not required.
+ */
+#define RTE_RCU_QSBR_DQ_MT_UNSAFE 1
+
+/**
+ * Parameters used when creating the defer queue.
+ */
+struct rte_rcu_qsbr_dq_parameters {
+	const char *name;
+	/**< Name of the queue. */
+	uint32_t flags;
+	/**< Flags to control API behaviors */
+	uint32_t size;
+	/**< Number of entries in queue. Typically, this will be
+	 *   the same as the maximum number of entries supported in the
+	 *   lock free data structure.
+	 *   Data structures with unbounded number of entries is not
+	 *   supported currently.
+	 */
+	uint32_t esize;
+	/**< Size (in bytes) of each element in the defer queue.
+	 *   This has to be multiple of 4B.
+	 */
+	uint32_t trigger_reclaim_limit;
+	/**< Trigger automatic reclamation after the defer queue
+	 *   has at least these many resources waiting. This auto
+	 *   reclamation is triggered in rte_rcu_qsbr_dq_enqueue API
+	 *   call.
+	 *   If this is greater than 'size', auto reclamation is
+	 *   not triggered.
+	 *   If this is set to 0, auto reclamation is triggered
+	 *   in every call to rte_rcu_qsbr_dq_enqueue API.
+	 */
+	uint32_t max_reclaim_size;
+	/**< When automatic reclamation is enabled, reclaim at the max
+	 *   these many resources. This should contain a valid value, if
+	 *   auto reclamation is on. Setting this to 'size' or greater will
+	 *   reclaim all possible resources currently on the defer queue.
+	 */
+	rte_rcu_qsbr_free_resource_t free_fn;
+	/**< Function to call to free the resource. */
+	void *p;
+	/**< Pointer passed to the free function. Typically, this is the
+	 *   pointer to the data structure to which the resource to free
+	 *   belongs. This can be NULL.
+	 */
+	struct rte_rcu_qsbr *v;
+	/**< RCU QSBR variable to use for this defer queue */
+};
+
+/* RTE defer queue structure.
+ * This structure holds the defer queue. The defer queue is used to
+ * hold the deleted entries from the data structure that are not
+ * yet freed.
+ */
+struct rte_rcu_qsbr_dq;
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Return the size of the memory occupied by a Quiescent State variable.
+ *
+ * @param max_threads
+ *   Maximum number of threads reporting quiescent state on this variable.
+ * @return
+ *   On success - size of memory in bytes required for this QS variable.
+ *   On error - 1 with error code set in rte_errno.
+ *   Possible rte_errno codes are:
+ *   - EINVAL - max_threads is 0
+ */
+__rte_experimental
+size_t
+rte_rcu_qsbr_get_memsize(uint32_t max_threads);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Initialize a Quiescent State (QS) variable.
+ *
+ * @param v
+ *   QS variable
+ * @param max_threads
+ *   Maximum number of threads reporting quiescent state on this variable.
+ *   This should be the same value as passed to rte_rcu_qsbr_get_memsize.
+ * @return
+ *   On success - 0
+ *   On error - 1 with error code set in rte_errno.
+ *   Possible rte_errno codes are:
+ *   - EINVAL - max_threads is 0 or 'v' is NULL.
+ *
+ */
+__rte_experimental
+int
+rte_rcu_qsbr_init(struct rte_rcu_qsbr *v, uint32_t max_threads);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register a reader thread to report its quiescent state
+ * on a QS variable.
+ *
+ * This is implemented as a lock-free function. It is multi-thread
+ * safe.
+ * Any reader thread that wants to report its quiescent state must
+ * call this API. This can be called during initialization or as part
+ * of the packet processing loop.
+ *
+ * Note that rte_rcu_qsbr_thread_online must be called before the
+ * thread updates its quiescent state using rte_rcu_qsbr_quiescent.
+ *
+ * @param v
+ *   QS variable
+ * @param thread_id
+ *   Reader thread with this thread ID will report its quiescent state on
+ *   the QS variable. thread_id is a value between 0 and (max_threads - 1).
+ *   'max_threads' is the parameter passed in 'rte_rcu_qsbr_init' API.
+ */
+__rte_experimental
+int
+rte_rcu_qsbr_thread_register(struct rte_rcu_qsbr *v, unsigned int thread_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Remove a reader thread, from the list of threads reporting their
+ * quiescent state on a QS variable.
+ *
+ * This is implemented as a lock-free function. It is multi-thread safe.
+ * This API can be called from the reader threads during shutdown.
+ * Ongoing quiescent state queries will stop waiting for the status from this
+ * unregistered reader thread.
+ *
+ * @param v
+ *   QS variable
+ * @param thread_id
+ *   Reader thread with this thread ID will stop reporting its quiescent
+ *   state on the QS variable.
+ */
+__rte_experimental
+int
+rte_rcu_qsbr_thread_unregister(struct rte_rcu_qsbr *v, unsigned int thread_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Add a registered reader thread, to the list of threads reporting their
+ * quiescent state on a QS variable.
+ *
+ * This is implemented as a lock-free function. It is multi-thread
+ * safe.
+ *
+ * Any registered reader thread that wants to report its quiescent state must
+ * call this API before calling rte_rcu_qsbr_quiescent. This can be called
+ * during initialization or as part of the packet processing loop.
+ *
+ * The reader thread must call rte_rcu_qsbr_thread_offline API, before
+ * calling any functions that block, to ensure that rte_rcu_qsbr_check
+ * API does not wait indefinitely for the reader thread to update its QS.
+ *
+ * The reader thread must call rte_rcu_thread_online API, after the blocking
+ * function call returns, to ensure that rte_rcu_qsbr_check API
+ * waits for the reader thread to update its quiescent state.
+ *
+ * @param v
+ *   QS variable
+ * @param thread_id
+ *   Reader thread with this thread ID will report its quiescent state on
+ *   the QS variable.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_rcu_qsbr_thread_online(struct rte_rcu_qsbr *v, unsigned int thread_id)
+{
+	uint64_t t;
+
+	RTE_ASSERT(v != NULL && thread_id < v->max_threads);
+
+	__RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, ERR, "Lock counter %u\n",
+				v->qsbr_cnt[thread_id].lock_cnt);
+
+	/* Copy the current value of token.
+	 * The fence at the end of the function will ensure that
+	 * the following will not move down after the load of any shared
+	 * data structure.
+	 */
+	t = __atomic_load_n(&v->token, __ATOMIC_RELAXED);
+
+	/* __atomic_store_n(cnt, __ATOMIC_RELAXED) is used to ensure
+	 * 'cnt' (64b) is accessed atomically.
+	 */
+	__atomic_store_n(&v->qsbr_cnt[thread_id].cnt,
+		t, __ATOMIC_RELAXED);
+
+	/* The subsequent load of the data structure should not
+	 * move above the store. Hence a store-load barrier
+	 * is required.
+	 * If the load of the data structure moves above the store,
+	 * writer might not see that the reader is online, even though
+	 * the reader is referencing the shared data structure.
+	 */
+#ifdef RTE_ARCH_X86_64
+	/* rte_smp_mb() for x86 is lighter */
+	rte_smp_mb();
+#else
+	__atomic_thread_fence(__ATOMIC_SEQ_CST);
+#endif
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Remove a registered reader thread from the list of threads reporting their
+ * quiescent state on a QS variable.
+ *
+ * This is implemented as a lock-free function. It is multi-thread
+ * safe.
+ *
+ * This can be called during initialization or as part of the packet
+ * processing loop.
+ *
+ * The reader thread must call rte_rcu_qsbr_thread_offline API, before
+ * calling any functions that block, to ensure that rte_rcu_qsbr_check
+ * API does not wait indefinitely for the reader thread to update its QS.
+ *
+ * @param v
+ *   QS variable
+ * @param thread_id
+ *   rte_rcu_qsbr_check API will not wait for the reader thread with
+ *   this thread ID to report its quiescent state on the QS variable.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_rcu_qsbr_thread_offline(struct rte_rcu_qsbr *v, unsigned int thread_id)
+{
+	RTE_ASSERT(v != NULL && thread_id < v->max_threads);
+
+	__RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, ERR, "Lock counter %u\n",
+				v->qsbr_cnt[thread_id].lock_cnt);
+
+	/* The reader can go offline only after the load of the
+	 * data structure is completed. i.e. any load of the
+	 * data strcture can not move after this store.
+	 */
+
+	__atomic_store_n(&v->qsbr_cnt[thread_id].cnt,
+		__RTE_QSBR_CNT_THR_OFFLINE, __ATOMIC_RELEASE);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Acquire a lock for accessing a shared data structure.
+ *
+ * This is implemented as a lock-free function. It is multi-thread
+ * safe.
+ *
+ * This API is provided to aid debugging. This should be called before
+ * accessing a shared data structure.
+ *
+ * When CONFIG_RTE_LIBRTE_RCU_DEBUG is enabled a lock counter is incremented.
+ * Similarly rte_rcu_qsbr_unlock will decrement the counter. When the
+ * rte_rcu_qsbr_check API will verify that this counter is 0.
+ *
+ * When CONFIG_RTE_LIBRTE_RCU_DEBUG is disabled, this API will do nothing.
+ *
+ * @param v
+ *   QS variable
+ * @param thread_id
+ *   Reader thread id
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_rcu_qsbr_lock(__rte_unused struct rte_rcu_qsbr *v,
+			__rte_unused unsigned int thread_id)
+{
+	RTE_ASSERT(v != NULL && thread_id < v->max_threads);
+
+#if defined(RTE_LIBRTE_RCU_DEBUG)
+	/* Increment the lock counter */
+	__atomic_fetch_add(&v->qsbr_cnt[thread_id].lock_cnt,
+				1, __ATOMIC_ACQUIRE);
+#endif
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Release a lock after accessing a shared data structure.
+ *
+ * This is implemented as a lock-free function. It is multi-thread
+ * safe.
+ *
+ * This API is provided to aid debugging. This should be called after
+ * accessing a shared data structure.
+ *
+ * When CONFIG_RTE_LIBRTE_RCU_DEBUG is enabled, rte_rcu_qsbr_unlock will
+ * decrement a lock counter. rte_rcu_qsbr_check API will verify that this
+ * counter is 0.
+ *
+ * When CONFIG_RTE_LIBRTE_RCU_DEBUG is disabled, this API will do nothing.
+ *
+ * @param v
+ *   QS variable
+ * @param thread_id
+ *   Reader thread id
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_rcu_qsbr_unlock(__rte_unused struct rte_rcu_qsbr *v,
+			__rte_unused unsigned int thread_id)
+{
+	RTE_ASSERT(v != NULL && thread_id < v->max_threads);
+
+#if defined(RTE_LIBRTE_RCU_DEBUG)
+	/* Decrement the lock counter */
+	__atomic_fetch_sub(&v->qsbr_cnt[thread_id].lock_cnt,
+				1, __ATOMIC_RELEASE);
+
+	__RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, WARNING,
+				"Lock counter %u. Nested locks?\n",
+				v->qsbr_cnt[thread_id].lock_cnt);
+#endif
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Ask the reader threads to report the quiescent state
+ * status.
+ *
+ * This is implemented as a lock-free function. It is multi-thread
+ * safe and can be called from worker threads.
+ *
+ * @param v
+ *   QS variable
+ * @return
+ *   - This is the token for this call of the API. This should be
+ *     passed to rte_rcu_qsbr_check API.
+ */
+__rte_experimental
+static __rte_always_inline uint64_t
+rte_rcu_qsbr_start(struct rte_rcu_qsbr *v)
+{
+	uint64_t t;
+
+	RTE_ASSERT(v != NULL);
+
+	/* Release the changes to the shared data structure.
+	 * This store release will ensure that changes to any data
+	 * structure are visible to the workers before the token
+	 * update is visible.
+	 */
+	t = __atomic_add_fetch(&v->token, 1, __ATOMIC_RELEASE);
+
+	return t;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Update quiescent state for a reader thread.
+ *
+ * This is implemented as a lock-free function. It is multi-thread safe.
+ * All the reader threads registered to report their quiescent state
+ * on the QS variable must call this API.
+ *
+ * @param v
+ *   QS variable
+ * @param thread_id
+ *   Update the quiescent state for the reader with this thread ID.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_rcu_qsbr_quiescent(struct rte_rcu_qsbr *v, unsigned int thread_id)
+{
+	uint64_t t;
+
+	RTE_ASSERT(v != NULL && thread_id < v->max_threads);
+
+	__RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, ERR, "Lock counter %u\n",
+				v->qsbr_cnt[thread_id].lock_cnt);
+
+	/* Acquire the changes to the shared data structure released
+	 * by rte_rcu_qsbr_start.
+	 * Later loads of the shared data structure should not move
+	 * above this load. Hence, use load-acquire.
+	 */
+	t = __atomic_load_n(&v->token, __ATOMIC_ACQUIRE);
+
+	/* Check if there are updates available from the writer.
+	 * Inform the writer that updates are visible to this reader.
+	 * Prior loads of the shared data structure should not move
+	 * beyond this store. Hence use store-release.
+	 */
+	if (t != __atomic_load_n(&v->qsbr_cnt[thread_id].cnt, __ATOMIC_RELAXED))
+		__atomic_store_n(&v->qsbr_cnt[thread_id].cnt,
+					 t, __ATOMIC_RELEASE);
+
+	__RTE_RCU_DP_LOG(DEBUG, "%s: update: token = %"PRIu64", Thread ID = %d",
+		__func__, t, thread_id);
+}
+
+/* Check the quiescent state counter for registered threads only, assuming
+ * that not all threads have registered.
+ */
+static __rte_always_inline int
+__rte_rcu_qsbr_check_selective(struct rte_rcu_qsbr *v, uint64_t t, bool wait)
+{
+	uint32_t i, j, id;
+	uint64_t bmap;
+	uint64_t c;
+	uint64_t *reg_thread_id;
+	uint64_t acked_token = __RTE_QSBR_CNT_MAX;
+
+	for (i = 0, reg_thread_id = __RTE_QSBR_THRID_ARRAY_ELM(v, 0);
+		i < v->num_elems;
+		i++, reg_thread_id++) {
+		/* Load the current registered thread bit map before
+		 * loading the reader thread quiescent state counters.
+		 */
+		bmap = __atomic_load_n(reg_thread_id, __ATOMIC_ACQUIRE);
+		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
+
+		while (bmap) {
+			j = __builtin_ctzl(bmap);
+			__RTE_RCU_DP_LOG(DEBUG,
+				"%s: check: token = %"PRIu64", wait = %d, Bit Map = 0x%"PRIx64", Thread ID = %d",
+				__func__, t, wait, bmap, id + j);
+			c = __atomic_load_n(
+					&v->qsbr_cnt[id + j].cnt,
+					__ATOMIC_ACQUIRE);
+			__RTE_RCU_DP_LOG(DEBUG,
+				"%s: status: token = %"PRIu64", wait = %d, Thread QS cnt = %"PRIu64", Thread ID = %d",
+				__func__, t, wait, c, id+j);
+
+			/* Counter is not checked for wrap-around condition
+			 * as it is a 64b counter.
+			 */
+			if (unlikely(c !=
+				__RTE_QSBR_CNT_THR_OFFLINE && c < t)) {
+				/* This thread is not in quiescent state */
+				if (!wait)
+					return 0;
+
+				rte_pause();
+				/* This thread might have unregistered.
+				 * Re-read the bitmap.
+				 */
+				bmap = __atomic_load_n(reg_thread_id,
+						__ATOMIC_ACQUIRE);
+
+				continue;
+			}
+
+			/* This thread is in quiescent state. Use the counter
+			 * to find the least acknowledged token among all the
+			 * readers.
+			 */
+			if (c != __RTE_QSBR_CNT_THR_OFFLINE && acked_token > c)
+				acked_token = c;
+
+			bmap &= ~(1UL << j);
+		}
+	}
+
+	/* All readers are checked, update least acknowledged token.
+	 * There might be multiple writers trying to update this. There is
+	 * no need to update this very accurately using compare-and-swap.
+	 */
+	if (acked_token != __RTE_QSBR_CNT_MAX)
+		__atomic_store_n(&v->acked_token, acked_token,
+			__ATOMIC_RELAXED);
+
+	return 1;
+}
+
+/* Check the quiescent state counter for all threads, assuming that
+ * all the threads have registered.
+ */
+static __rte_always_inline int
+__rte_rcu_qsbr_check_all(struct rte_rcu_qsbr *v, uint64_t t, bool wait)
+{
+	uint32_t i;
+	struct rte_rcu_qsbr_cnt *cnt;
+	uint64_t c;
+	uint64_t acked_token = __RTE_QSBR_CNT_MAX;
+
+	for (i = 0, cnt = v->qsbr_cnt; i < v->max_threads; i++, cnt++) {
+		__RTE_RCU_DP_LOG(DEBUG,
+			"%s: check: token = %"PRIu64", wait = %d, Thread ID = %d",
+			__func__, t, wait, i);
+		while (1) {
+			c = __atomic_load_n(&cnt->cnt, __ATOMIC_ACQUIRE);
+			__RTE_RCU_DP_LOG(DEBUG,
+				"%s: status: token = %"PRIu64", wait = %d, Thread QS cnt = %"PRIu64", Thread ID = %d",
+				__func__, t, wait, c, i);
+
+			/* Counter is not checked for wrap-around condition
+			 * as it is a 64b counter.
+			 */
+			if (likely(c == __RTE_QSBR_CNT_THR_OFFLINE || c >= t))
+				break;
+
+			/* This thread is not in quiescent state */
+			if (!wait)
+				return 0;
+
+			rte_pause();
+		}
+
+		/* This thread is in quiescent state. Use the counter to find
+		 * the least acknowledged token among all the readers.
+		 */
+		if (likely(c != __RTE_QSBR_CNT_THR_OFFLINE && acked_token > c))
+			acked_token = c;
+	}
+
+	/* All readers are checked, update least acknowledged token.
+	 * There might be multiple writers trying to update this. There is
+	 * no need to update this very accurately using compare-and-swap.
+	 */
+	if (acked_token != __RTE_QSBR_CNT_MAX)
+		__atomic_store_n(&v->acked_token, acked_token,
+			__ATOMIC_RELAXED);
+
+	return 1;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Checks if all the reader threads have entered the quiescent state
+ * referenced by token.
+ *
+ * This is implemented as a lock-free function. It is multi-thread
+ * safe and can be called from the worker threads as well.
+ *
+ * If this API is called with 'wait' set to true, the following
+ * factors must be considered:
+ *
+ * 1) If the calling thread is also reporting the status on the
+ * same QS variable, it must update the quiescent state status, before
+ * calling this API.
+ *
+ * 2) In addition, while calling from multiple threads, only
+ * one of those threads can be reporting the quiescent state status
+ * on a given QS variable.
+ *
+ * @param v
+ *   QS variable
+ * @param t
+ *   Token returned by rte_rcu_qsbr_start API
+ * @param wait
+ *   If true, block till all the reader threads have completed entering
+ *   the quiescent state referenced by token 't'.
+ * @return
+ *   - 0 if all reader threads have NOT passed through specified number
+ *     of quiescent states.
+ *   - 1 if all reader threads have passed through specified number
+ *     of quiescent states.
+ */
+__rte_experimental
+static __rte_always_inline int
+rte_rcu_qsbr_check(struct rte_rcu_qsbr *v, uint64_t t, bool wait)
+{
+	RTE_ASSERT(v != NULL);
+
+	/* Check if all the readers have already acknowledged this token */
+	if (likely(t <= v->acked_token)) {
+		__RTE_RCU_DP_LOG(DEBUG,
+			"%s: check: token = %"PRIu64", wait = %d",
+			__func__, t, wait);
+		__RTE_RCU_DP_LOG(DEBUG,
+			"%s: status: least acked token = %"PRIu64"",
+			__func__, v->acked_token);
+		return 1;
+	}
+
+	if (likely(v->num_threads == v->max_threads))
+		return __rte_rcu_qsbr_check_all(v, t, wait);
+	else
+		return __rte_rcu_qsbr_check_selective(v, t, wait);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Wait till the reader threads have entered quiescent state.
+ *
+ * This is implemented as a lock-free function. It is multi-thread safe.
+ * This API can be thought of as a wrapper around rte_rcu_qsbr_start and
+ * rte_rcu_qsbr_check APIs.
+ *
+ * If this API is called from multiple threads, only one of
+ * those threads can be reporting the quiescent state status on a
+ * given QS variable.
+ *
+ * @param v
+ *   QS variable
+ * @param thread_id
+ *   Thread ID of the caller if it is registered to report quiescent state
+ *   on this QS variable (i.e. the calling thread is also part of the
+ *   readside critical section). If not, pass RTE_QSBR_THRID_INVALID.
+ */
+__rte_experimental
+void
+rte_rcu_qsbr_synchronize(struct rte_rcu_qsbr *v, unsigned int thread_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Dump the details of a single QS variables to a file.
+ *
+ * It is NOT multi-thread safe.
+ *
+ * @param f
+ *   A pointer to a file for output
+ * @param v
+ *   QS variable
+ * @return
+ *   On success - 0
+ *   On error - 1 with error code set in rte_errno.
+ *   Possible rte_errno codes are:
+ *   - EINVAL - NULL parameters are passed
+ */
+__rte_experimental
+int
+rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Create a queue used to store the data structure elements that can
+ * be freed later. This queue is referred to as 'defer queue'.
+ *
+ * @param params
+ *   Parameters to create a defer queue.
+ * @return
+ *   On success - Valid pointer to defer queue
+ *   On error - NULL
+ *   Possible rte_errno codes are:
+ *   - EINVAL - NULL parameters are passed
+ *   - ENOMEM - Not enough memory
+ */
+__rte_experimental
+struct rte_rcu_qsbr_dq *
+rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Enqueue one resource to the defer queue and start the grace period.
+ * The resource will be freed later after at least one grace period
+ * is over.
+ *
+ * If the defer queue is full, it will attempt to reclaim resources.
+ * It will also reclaim resources at regular intervals to avoid
+ * the defer queue from growing too big.
+ *
+ * Multi-thread safety is provided as the defer queue configuration.
+ * When multi-thread safety is requested, it is possible that the
+ * resources are not stored in their order of deletion. This results
+ * in resources being held in the defer queue longer than they should.
+ *
+ * @param dq
+ *   Defer queue to allocate an entry from.
+ * @param e
+ *   Pointer to resource data to copy to the defer queue. The size of
+ *   the data to copy is equal to the element size provided when the
+ *   defer queue was created.
+ * @return
+ *   On success - 0
+ *   On error - 1 with rte_errno set to
+ *   - EINVAL - NULL parameters are passed
+ *   - ENOSPC - Defer queue is full. This condition can not happen
+ *		if the defer queue size is equal (or larger) than the
+ *		number of elements in the data structure.
+ */
+__rte_experimental
+int
+rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Free resources from the defer queue.
+ *
+ * This API is multi-thread safe.
+ *
+ * @param dq
+ *   Defer queue to free an entry from.
+ * @param n
+ *   Maximum number of resources to free.
+ * @param freed
+ *   Number of resources that were freed.
+ * @param pending
+ *   Number of resources pending on the defer queue. This number might not
+ *   be accurate if multi-thread safety is configured.
+ * @param available
+ *   Number of resources that can be added to the defer queue.
+ *   This number might not be accurate if multi-thread safety is configured.
+ * @return
+ *   On successful reclamation of at least 1 resource - 0
+ *   On error - 1 with rte_errno set to
+ *   - EINVAL - NULL parameters are passed
+ */
+__rte_experimental
+int
+rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq, unsigned int n,
+	unsigned int *freed, unsigned int *pending, unsigned int *available);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Delete a defer queue.
+ *
+ * It tries to reclaim all the resources on the defer queue.
+ * If any of the resources have not completed the grace period
+ * the reclamation stops and returns immediately. The rest of
+ * the resources are not reclaimed and the defer queue is not
+ * freed.
+ *
+ * @param dq
+ *   Defer queue to delete.
+ * @return
+ *   On success - 0
+ *   On error - 1
+ *   Possible rte_errno codes are:
+ *   - EAGAIN - Some of the resources have not completed at least 1 grace
+ *		period, try again.
+ */
+__rte_experimental
+int
+rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RCU_QSBR_H_ */
diff --git a/src/spdk/dpdk/lib/librte_rcu/rte_rcu_version.map b/src/spdk/dpdk/lib/librte_rcu/rte_rcu_version.map
new file mode 100644
index 000000000..dfac88a37
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_rcu/rte_rcu_version.map
@@ -0,0 +1,17 @@
+EXPERIMENTAL {
+	global:
+
+	rte_rcu_log_type;
+	rte_rcu_qsbr_dump;
+	rte_rcu_qsbr_get_memsize;
+	rte_rcu_qsbr_init;
+	rte_rcu_qsbr_synchronize;
+	rte_rcu_qsbr_thread_register;
+	rte_rcu_qsbr_thread_unregister;
+	rte_rcu_qsbr_dq_create;
+	rte_rcu_qsbr_dq_enqueue;
+	rte_rcu_qsbr_dq_reclaim;
+	rte_rcu_qsbr_dq_delete;
+
+	local: *;
+};
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 18:45:59 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 18:45:59 +0000
commit	19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree	42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/spdk/dpdk/lib/librte_rcu
parent	Initial commit. (diff)
download	ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip