summaryrefslogtreecommitdiffstats
path: root/src/stick_table.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/stick_table.c')
-rw-r--r--src/stick_table.c1000
1 files changed, 725 insertions, 275 deletions
diff --git a/src/stick_table.c b/src/stick_table.c
index b1ce9d4..08a22e4 100644
--- a/src/stick_table.c
+++ b/src/stick_table.c
@@ -46,6 +46,18 @@
#include <haproxy/tools.h>
#include <haproxy/xxhash.h>
+#if defined(USE_PROMEX)
+#include <promex/promex.h>
+#endif
+
+/* stick table base fields */
+enum sticktable_field {
+ STICKTABLE_SIZE = 0,
+ STICKTABLE_USED,
+ /* must always be the last one */
+ STICKTABLE_TOTAL_FIELDS
+};
+
/* structure used to return a table key built from a sample */
static THREAD_LOCAL struct stktable_key static_table_key;
@@ -98,15 +110,27 @@ void __stksess_free(struct stktable *t, struct stksess *ts)
*/
void stksess_free(struct stktable *t, struct stksess *ts)
{
+ uint shard;
+ size_t len;
void *data;
+
data = stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_KEY);
if (data) {
dict_entry_unref(&server_key_dict, stktable_data_cast(data, std_t_dict));
stktable_data_cast(data, std_t_dict) = NULL;
}
- HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock);
+
+ if (t->type == SMP_T_STR)
+ len = strlen((const char *)ts->key.key);
+ else
+ len = t->key_size;
+
+ shard = stktable_calc_shard_num(t, ts->key.key, len);
+
+ /* make the compiler happy when shard is not used without threads */
+ ALREADY_CHECKED(shard);
+
__stksess_free(t, ts);
- HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock);
}
/*
@@ -115,17 +139,25 @@ void stksess_free(struct stktable *t, struct stksess *ts)
*/
int __stksess_kill(struct stktable *t, struct stksess *ts)
{
+ int updt_locked = 0;
+
if (HA_ATOMIC_LOAD(&ts->ref_cnt))
return 0;
- eb32_delete(&ts->exp);
if (ts->upd.node.leaf_p) {
+ updt_locked = 1;
HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock);
- eb32_delete(&ts->upd);
- HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ if (HA_ATOMIC_LOAD(&ts->ref_cnt))
+ goto out_unlock;
}
+ eb32_delete(&ts->exp);
+ eb32_delete(&ts->upd);
ebmb_delete(&ts->key);
__stksess_free(t, ts);
+
+ out_unlock:
+ if (updt_locked)
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
return 1;
}
@@ -136,14 +168,26 @@ int __stksess_kill(struct stktable *t, struct stksess *ts)
*/
int stksess_kill(struct stktable *t, struct stksess *ts, int decrefcnt)
{
+ uint shard;
+ size_t len;
int ret;
if (decrefcnt && HA_ATOMIC_SUB_FETCH(&ts->ref_cnt, 1) != 0)
return 0;
- HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
+ if (t->type == SMP_T_STR)
+ len = strlen((const char *)ts->key.key);
+ else
+ len = t->key_size;
+
+ shard = stktable_calc_shard_num(t, ts->key.key, len);
+
+ /* make the compiler happy when shard is not used without threads */
+ ALREADY_CHECKED(shard);
+
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
ret = __stksess_kill(t, ts);
- HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
return ret;
}
@@ -203,6 +247,7 @@ static struct stksess *__stksess_init(struct stktable *t, struct stksess * ts)
memset((void *)ts - t->data_size, 0, t->data_size);
ts->ref_cnt = 0;
ts->shard = 0;
+ ts->seen = 0;
ts->key.node.leaf_p = NULL;
ts->exp.node.leaf_p = NULL;
ts->upd.node.leaf_p = NULL;
@@ -215,100 +260,124 @@ static struct stksess *__stksess_init(struct stktable *t, struct stksess * ts)
* Trash oldest <to_batch> sticky sessions from table <t>
* Returns number of trashed sticky sessions. It may actually trash less
* than expected if finding these requires too long a search time (e.g.
- * most of them have ts->ref_cnt>0).
+ * most of them have ts->ref_cnt>0). This function locks the table.
*/
-int __stktable_trash_oldest(struct stktable *t, int to_batch)
+int stktable_trash_oldest(struct stktable *t, int to_batch)
{
struct stksess *ts;
struct eb32_node *eb;
int max_search = to_batch * 2; // no more than 50% misses
+ int max_per_shard = (to_batch + CONFIG_HAP_TBL_BUCKETS - 1) / CONFIG_HAP_TBL_BUCKETS;
+ int done_per_shard;
int batched = 0;
- int looped = 0;
+ int updt_locked;
+ int looped;
+ int shard;
- eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK);
+ shard = 0;
while (batched < to_batch) {
+ done_per_shard = 0;
+ looped = 0;
+ updt_locked = 0;
+
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
+
+ eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK);
+ while (batched < to_batch && done_per_shard < max_per_shard) {
+ if (unlikely(!eb)) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now if we
+ * have not yet visited it.
+ */
+ if (looped)
+ break;
+ looped = 1;
+ eb = eb32_first(&t->shards[shard].exps);
+ if (likely(!eb))
+ break;
+ }
- if (unlikely(!eb)) {
- /* we might have reached the end of the tree, typically because
- * <now_ms> is in the first half and we're first scanning the last
- * half. Let's loop back to the beginning of the tree now if we
- * have not yet visited it.
- */
- if (looped)
- break;
- looped = 1;
- eb = eb32_first(&t->exps);
- if (likely(!eb))
+ if (--max_search < 0)
break;
- }
- if (--max_search < 0)
- break;
+ /* timer looks expired, detach it from the queue */
+ ts = eb32_entry(eb, struct stksess, exp);
+ eb = eb32_next(eb);
- /* timer looks expired, detach it from the queue */
- ts = eb32_entry(eb, struct stksess, exp);
- eb = eb32_next(eb);
+ /* don't delete an entry which is currently referenced */
+ if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0)
+ continue;
- /* don't delete an entry which is currently referenced */
- if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0)
- continue;
+ eb32_delete(&ts->exp);
- eb32_delete(&ts->exp);
+ if (ts->expire != ts->exp.key) {
+ if (!tick_isset(ts->expire))
+ continue;
- if (ts->expire != ts->exp.key) {
- if (!tick_isset(ts->expire))
- continue;
+ ts->exp.key = ts->expire;
+ eb32_insert(&t->shards[shard].exps, &ts->exp);
- ts->exp.key = ts->expire;
- eb32_insert(&t->exps, &ts->exp);
+ /* the update might have jumped beyond the next element,
+ * possibly causing a wrapping. We need to check whether
+ * the next element should be used instead. If the next
+ * element doesn't exist it means we're on the right
+ * side and have to check the first one then. If it
+ * exists and is closer, we must use it, otherwise we
+ * use the current one.
+ */
+ if (!eb)
+ eb = eb32_first(&t->shards[shard].exps);
- /* the update might have jumped beyond the next element,
- * possibly causing a wrapping. We need to check whether
- * the next element should be used instead. If the next
- * element doesn't exist it means we're on the right
- * side and have to check the first one then. If it
- * exists and is closer, we must use it, otherwise we
- * use the current one.
- */
- if (!eb)
- eb = eb32_first(&t->exps);
+ if (!eb || tick_is_lt(ts->exp.key, eb->key))
+ eb = &ts->exp;
- if (!eb || tick_is_lt(ts->exp.key, eb->key))
- eb = &ts->exp;
+ continue;
+ }
- continue;
- }
+ /* if the entry is in the update list, we must be extremely careful
+ * because peers can see it at any moment and start to use it. Peers
+ * will take the table's updt_lock for reading when doing that, and
+ * with that lock held, will grab a ref_cnt before releasing the
+ * lock. So we must take this lock as well and check the ref_cnt.
+ */
+ if (ts->upd.node.leaf_p) {
+ if (!updt_locked) {
+ updt_locked = 1;
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ }
+ /* now we're locked, new peers can't grab it anymore,
+ * existing ones already have the ref_cnt.
+ */
+ if (HA_ATOMIC_LOAD(&ts->ref_cnt))
+ continue;
+ }
- /* session expired, trash it */
- ebmb_delete(&ts->key);
- if (ts->upd.node.leaf_p) {
- HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ /* session expired, trash it */
+ ebmb_delete(&ts->key);
eb32_delete(&ts->upd);
- HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ __stksess_free(t, ts);
+ batched++;
+ done_per_shard++;
}
- __stksess_free(t, ts);
- batched++;
- }
- return batched;
-}
+ if (updt_locked)
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
-/*
- * Trash oldest <to_batch> sticky sessions from table <t>
- * Returns number of trashed sticky sessions.
- * This function locks the table
- */
-int stktable_trash_oldest(struct stktable *t, int to_batch)
-{
- int ret;
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
- HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
- ret = __stktable_trash_oldest(t, to_batch);
- HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+ if (max_search <= 0)
+ break;
- return ret;
+ shard = (shard + 1) % CONFIG_HAP_TBL_BUCKETS;
+ if (!shard)
+ break;
+ }
+
+ return batched;
}
+
/*
* Allocate and initialise a new sticky session.
* The new sticky session is returned or NULL in case of lack of memory.
@@ -346,17 +415,17 @@ struct stksess *stksess_new(struct stktable *t, struct stktable_key *key)
}
/*
- * Looks in table <t> for a sticky session matching key <key>.
+ * Looks in table <t> for a sticky session matching key <key> in shard <shard>.
* Returns pointer on requested sticky session or NULL if none was found.
*/
-struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *key)
+struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *key, uint shard)
{
struct ebmb_node *eb;
if (t->type == SMP_T_STR)
- eb = ebst_lookup_len(&t->keys, key->key, key->key_len+1 < t->key_size ? key->key_len : t->key_size-1);
+ eb = ebst_lookup_len(&t->shards[shard].keys, key->key, key->key_len + 1 < t->key_size ? key->key_len : t->key_size - 1);
else
- eb = ebmb_lookup(&t->keys, key->key, t->key_size);
+ eb = ebmb_lookup(&t->shards[shard].keys, key->key, t->key_size);
if (unlikely(!eb)) {
/* no session found */
@@ -375,12 +444,60 @@ struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *k
struct stksess *stktable_lookup_key(struct stktable *t, struct stktable_key *key)
{
struct stksess *ts;
+ uint shard;
+ size_t len;
+
+ if (t->type == SMP_T_STR)
+ len = key->key_len + 1 < t->key_size ? key->key_len : t->key_size - 1;
+ else
+ len = t->key_size;
- HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock);
- ts = __stktable_lookup_key(t, key);
+ shard = stktable_calc_shard_num(t, key->key, len);
+
+ HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
+ ts = __stktable_lookup_key(t, key, shard);
if (ts)
HA_ATOMIC_INC(&ts->ref_cnt);
- HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock);
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
+
+ return ts;
+}
+
+/*
+ * Looks in table <t> for a sticky session matching ptr <ptr>.
+ * Returns pointer on requested sticky session or NULL if none was found.
+ * The refcount of the found entry is increased and this function
+ * is protected using the table lock
+ */
+struct stksess *stktable_lookup_ptr(struct stktable *t, void *ptr)
+{
+ struct stksess *ts = NULL;
+ struct ebmb_node *eb;
+ int shard;
+
+ for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) {
+ HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
+ /* linear search is performed, this could be optimized by adding
+ * an eb node dedicated to ptr lookups into stksess struct to
+ * leverage eb_lookup function instead.
+ */
+ eb = ebmb_first(&t->shards[shard].keys);
+ while (eb) {
+ struct stksess *cur;
+
+ cur = ebmb_entry(eb, struct stksess, key);
+ if (cur == ptr) {
+ ts = cur;
+ break;
+ }
+ eb = ebmb_next(eb);
+ }
+ if (ts)
+ HA_ATOMIC_INC(&ts->ref_cnt);
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
+ if (ts)
+ return ts;
+ }
return ts;
}
@@ -389,14 +506,14 @@ struct stksess *stktable_lookup_key(struct stktable *t, struct stktable_key *key
* Looks in table <t> for a sticky session with same key as <ts>.
* Returns pointer on requested sticky session or NULL if none was found.
*/
-struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts)
+struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts, uint shard)
{
struct ebmb_node *eb;
if (t->type == SMP_T_STR)
- eb = ebst_lookup(&(t->keys), (char *)ts->key.key);
+ eb = ebst_lookup(&t->shards[shard].keys, (char *)ts->key.key);
else
- eb = ebmb_lookup(&(t->keys), ts->key.key, t->key_size);
+ eb = ebmb_lookup(&t->shards[shard].keys, ts->key.key, t->key_size);
if (unlikely(!eb))
return NULL;
@@ -413,12 +530,21 @@ struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts)
struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts)
{
struct stksess *lts;
+ uint shard;
+ size_t len;
+
+ if (t->type == SMP_T_STR)
+ len = strlen((const char *)ts->key.key);
+ else
+ len = t->key_size;
+
+ shard = stktable_calc_shard_num(t, ts->key.key, len);
- HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock);
- lts = __stktable_lookup(t, ts);
+ HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
+ lts = __stktable_lookup(t, ts, shard);
if (lts)
HA_ATOMIC_INC(&lts->ref_cnt);
- HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock);
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
return lts;
}
@@ -428,7 +554,7 @@ struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts)
* The node will be also inserted into the update tree if needed, at a position
* depending if the update is a local or coming from a remote node.
* If <decrefcnt> is set, the ts entry's ref_cnt will be decremented. The table's
- * write lock may be taken.
+ * updt_lock may be taken for writes.
*/
void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, int expire, int decrefcnt)
{
@@ -444,39 +570,18 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local,
/* If sync is enabled */
if (t->sync_task) {
- try_lock_again:
- /* We'll need to reliably check that the entry is in the tree.
- * It's only inserted/deleted using a write lock so a read lock
- * is sufficient to verify this. We may then need to upgrade it
- * to perform an update (which is rare under load), and if the
- * upgrade fails, we'll try again with a write lock directly.
- */
- if (use_wrlock)
- HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock);
- else
- HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->updt_lock);
-
if (local) {
/* Check if this entry is not in the tree or not
* scheduled for at least one peer.
*/
- if (!ts->upd.node.leaf_p
- || (int)(t->commitupdate - ts->upd.key) >= 0
- || (int)(ts->upd.key - t->localupdate) >= 0) {
- /* Time to upgrade the read lock to write lock if needed */
- if (!use_wrlock) {
- if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &t->updt_lock) != 0) {
- /* failed, try again */
- HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
- use_wrlock = 1;
- goto try_lock_again;
- }
- HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &t->updt_lock);
- use_wrlock = 1;
- }
+ if (!ts->upd.node.leaf_p || _HA_ATOMIC_LOAD(&ts->seen)) {
+ /* Time to upgrade the read lock to write lock */
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ use_wrlock = 1;
/* here we're write-locked */
+ ts->seen = 0;
ts->upd.key = ++t->update;
t->localupdate = t->update;
eb32_delete(&ts->upd);
@@ -489,28 +594,30 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local,
do_wakeup = 1;
}
else {
- /* If this entry is not in the tree */
-
+ /* Note: we land here when learning new entries from
+ * remote peers. We hold one ref_cnt so the entry
+ * cannot vanish under us, however if two peers create
+ * the same key at the exact same time, we must be
+ * careful not to perform two parallel inserts! Hence
+ * we need to first check leaf_p to know if the entry
+ * is new, then lock the tree and check the entry again
+ * (since another thread could have created it in the
+ * mean time).
+ */
if (!ts->upd.node.leaf_p) {
/* Time to upgrade the read lock to write lock if needed */
- if (!use_wrlock) {
- if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &t->updt_lock) != 0) {
- /* failed, try again */
- HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
- use_wrlock = 1;
- goto try_lock_again;
- }
- HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &t->updt_lock);
- use_wrlock = 1;
- }
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ use_wrlock = 1;
/* here we're write-locked */
-
- ts->upd.key= (++t->update)+(2147483648U);
- eb = eb32_insert(&t->updates, &ts->upd);
- if (eb != &ts->upd) {
- eb32_delete(eb);
- eb32_insert(&t->updates, &ts->upd);
+ if (!ts->upd.node.leaf_p) {
+ ts->seen = 0;
+ ts->upd.key= (++t->update)+(2147483648U);
+ eb = eb32_insert(&t->updates, &ts->upd);
+ if (eb != &ts->upd) {
+ eb32_delete(eb);
+ eb32_insert(&t->updates, &ts->upd);
+ }
}
}
}
@@ -518,8 +625,6 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local,
/* drop the lock now */
if (use_wrlock)
HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
- else
- HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
}
if (decrefcnt)
@@ -569,14 +674,14 @@ static void stktable_release(struct stktable *t, struct stksess *ts)
* is set. <ts> is returned if properly inserted, otherwise the one already
* present if any.
*/
-struct stksess *__stktable_store(struct stktable *t, struct stksess *ts)
+struct stksess *__stktable_store(struct stktable *t, struct stksess *ts, uint shard)
{
struct ebmb_node *eb;
- eb = ebmb_insert(&t->keys, &ts->key, t->key_size);
+ eb = ebmb_insert(&t->shards[shard].keys, &ts->key, t->key_size);
if (likely(eb == &ts->key)) {
ts->exp.key = ts->expire;
- eb32_insert(&t->exps, &ts->exp);
+ eb32_insert(&t->shards[shard].exps, &ts->exp);
}
return ebmb_entry(eb, struct stksess, key); // most commonly this is <ts>
}
@@ -621,11 +726,24 @@ void stktable_requeue_exp(struct stktable *t, const struct stksess *ts)
struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key *key)
{
struct stksess *ts, *ts2;
+ uint shard;
+ size_t len;
if (!key)
return NULL;
- ts = stktable_lookup_key(table, key);
+ if (table->type == SMP_T_STR)
+ len = key->key_len + 1 < table->key_size ? key->key_len : table->key_size - 1;
+ else
+ len = table->key_size;
+
+ shard = stktable_calc_shard_num(table, key->key, len);
+
+ HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock);
+ ts = __stktable_lookup_key(table, key, shard);
+ if (ts)
+ HA_ATOMIC_INC(&ts->ref_cnt);
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock);
if (ts)
return ts;
@@ -645,12 +763,12 @@ struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key *
* one we find.
*/
- HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->lock);
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock);
- ts2 = __stktable_store(table, ts);
+ ts2 = __stktable_store(table, ts, shard);
HA_ATOMIC_INC(&ts2->ref_cnt);
- HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->lock);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock);
if (unlikely(ts2 != ts)) {
/* another entry was added in the mean time, let's
@@ -671,12 +789,21 @@ struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key *
struct stksess *stktable_set_entry(struct stktable *table, struct stksess *nts)
{
struct stksess *ts;
+ uint shard;
+ size_t len;
+
+ if (table->type == SMP_T_STR)
+ len = strlen((const char *)nts->key.key);
+ else
+ len = table->key_size;
+
+ shard = stktable_calc_shard_num(table, nts->key.key, len);
- HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->lock);
- ts = __stktable_lookup(table, nts);
+ HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock);
+ ts = __stktable_lookup(table, nts, shard);
if (ts) {
HA_ATOMIC_INC(&ts->ref_cnt);
- HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->lock);
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock);
return ts;
}
ts = nts;
@@ -684,18 +811,18 @@ struct stksess *stktable_set_entry(struct stktable *table, struct stksess *nts)
/* let's increment it before switching to exclusive */
HA_ATOMIC_INC(&ts->ref_cnt);
- if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &table->lock) != 0) {
+ if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &table->shards[shard].sh_lock) != 0) {
/* upgrade to seek lock failed, let's drop and take */
- HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->lock);
- HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->lock);
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock);
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock);
}
else
- HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &table->lock);
+ HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &table->shards[shard].sh_lock);
/* now we're write-locked */
- __stktable_store(table, ts);
- HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->lock);
+ __stktable_store(table, ts, shard);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock);
stktable_requeue_exp(table, ts);
return ts;
@@ -710,87 +837,117 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
struct stktable *t = context;
struct stksess *ts;
struct eb32_node *eb;
- int updt_locked = 0;
- int looped = 0;
+ int updt_locked;
+ int looped;
int exp_next;
+ int task_exp;
+ int shard;
+
+ task_exp = TICK_ETERNITY;
+
+ for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) {
+ updt_locked = 0;
+ looped = 0;
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
+ eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK);
+
+ while (1) {
+ if (unlikely(!eb)) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now if we
+ * have not yet visited it.
+ */
+ if (looped)
+ break;
+ looped = 1;
+ eb = eb32_first(&t->shards[shard].exps);
+ if (likely(!eb))
+ break;
+ }
- HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
- eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK);
-
- while (1) {
- if (unlikely(!eb)) {
- /* we might have reached the end of the tree, typically because
- * <now_ms> is in the first half and we're first scanning the last
- * half. Let's loop back to the beginning of the tree now if we
- * have not yet visited it.
- */
- if (looped)
- break;
- looped = 1;
- eb = eb32_first(&t->exps);
- if (likely(!eb))
- break;
- }
-
- if (likely(tick_is_lt(now_ms, eb->key))) {
- /* timer not expired yet, revisit it later */
- exp_next = eb->key;
- goto out_unlock;
- }
+ if (likely(tick_is_lt(now_ms, eb->key))) {
+ /* timer not expired yet, revisit it later */
+ exp_next = eb->key;
+ goto out_unlock;
+ }
- /* timer looks expired, detach it from the queue */
- ts = eb32_entry(eb, struct stksess, exp);
- eb = eb32_next(eb);
+ /* timer looks expired, detach it from the queue */
+ ts = eb32_entry(eb, struct stksess, exp);
+ eb = eb32_next(eb);
- /* don't delete an entry which is currently referenced */
- if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0)
- continue;
+ /* don't delete an entry which is currently referenced */
+ if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0)
+ continue;
- eb32_delete(&ts->exp);
+ eb32_delete(&ts->exp);
- if (!tick_is_expired(ts->expire, now_ms)) {
- if (!tick_isset(ts->expire))
- continue;
+ if (!tick_is_expired(ts->expire, now_ms)) {
+ if (!tick_isset(ts->expire))
+ continue;
- ts->exp.key = ts->expire;
- eb32_insert(&t->exps, &ts->exp);
+ ts->exp.key = ts->expire;
+ eb32_insert(&t->shards[shard].exps, &ts->exp);
- /* the update might have jumped beyond the next element,
- * possibly causing a wrapping. We need to check whether
- * the next element should be used instead. If the next
- * element doesn't exist it means we're on the right
- * side and have to check the first one then. If it
- * exists and is closer, we must use it, otherwise we
- * use the current one.
- */
- if (!eb)
- eb = eb32_first(&t->exps);
+ /* the update might have jumped beyond the next element,
+ * possibly causing a wrapping. We need to check whether
+ * the next element should be used instead. If the next
+ * element doesn't exist it means we're on the right
+ * side and have to check the first one then. If it
+ * exists and is closer, we must use it, otherwise we
+ * use the current one.
+ */
+ if (!eb)
+ eb = eb32_first(&t->shards[shard].exps);
- if (!eb || tick_is_lt(ts->exp.key, eb->key))
- eb = &ts->exp;
- continue;
- }
+ if (!eb || tick_is_lt(ts->exp.key, eb->key))
+ eb = &ts->exp;
+ continue;
+ }
- /* session expired, trash it */
- ebmb_delete(&ts->key);
- if (ts->upd.node.leaf_p) {
- if (!updt_locked) {
- updt_locked = 1;
- HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ /* if the entry is in the update list, we must be extremely careful
+ * because peers can see it at any moment and start to use it. Peers
+ * will take the table's updt_lock for reading when doing that, and
+ * with that lock held, will grab a ref_cnt before releasing the
+ * lock. So we must take this lock as well and check the ref_cnt.
+ */
+ if (ts->upd.node.leaf_p) {
+ if (!updt_locked) {
+ updt_locked = 1;
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ }
+ /* now we're locked, new peers can't grab it anymore,
+ * existing ones already have the ref_cnt.
+ */
+ if (HA_ATOMIC_LOAD(&ts->ref_cnt))
+ continue;
}
+
+ /* session expired, trash it */
+ ebmb_delete(&ts->key);
eb32_delete(&ts->upd);
+ __stksess_free(t, ts);
}
- __stksess_free(t, ts);
- }
- /* We have found no task to expire in any tree */
- exp_next = TICK_ETERNITY;
+ /* We have found no task to expire in any tree */
+ exp_next = TICK_ETERNITY;
-out_unlock:
- task->expire = exp_next;
- if (updt_locked)
- HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ out_unlock:
+ if (updt_locked)
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
+
+ task_exp = tick_first(task_exp, exp_next);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
+ }
+
+ /* Reset the task's expiration. We do this under the lock so as not
+ * to ruin a call to task_queue() in stktable_requeue_exp() if we
+ * were to update with TICK_ETERNITY.
+ */
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
+ task->expire = task_exp;
HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+
return task;
}
@@ -803,12 +960,17 @@ out_unlock:
int stktable_init(struct stktable *t, char **err_msg)
{
int peers_retval = 0;
+ int shard;
t->hash_seed = XXH64(t->id, t->idlen, 0);
if (t->size) {
- t->keys = EB_ROOT_UNIQUE;
- memset(&t->exps, 0, sizeof(t->exps));
+ for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) {
+ t->shards[shard].keys = EB_ROOT_UNIQUE;
+ memset(&t->shards[shard].exps, 0, sizeof(t->shards[shard].exps));
+ HA_RWLOCK_INIT(&t->shards[shard].sh_lock);
+ }
+
t->updates = EB_ROOT_UNIQUE;
HA_RWLOCK_INIT(&t->lock);
@@ -1402,6 +1564,8 @@ struct stktable_data_type stktable_data_types[STKTABLE_DATA_TYPES] = {
[STKTABLE_DT_GPT] = { .name = "gpt", .std_type = STD_T_UINT, .is_array = 1, .as_is = 1 },
[STKTABLE_DT_GPC] = { .name = "gpc", .std_type = STD_T_UINT, .is_array = 1 },
[STKTABLE_DT_GPC_RATE] = { .name = "gpc_rate", .std_type = STD_T_FRQP, .is_array = 1, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_GLITCH_CNT] = { .name = "glitch_cnt", .std_type = STD_T_UINT },
+ [STKTABLE_DT_GLITCH_RATE] = { .name = "glitch_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
};
/* Registers stick-table extra data type with index <idx>, name <name>, type
@@ -1741,6 +1905,79 @@ static int sample_conv_table_bytes_out_rate(const struct arg *arg_p, struct samp
return !!ptr;
}
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the cumulated number of front glitches for the
+ * key if the key is present in the table, otherwise zero, so that comparisons
+ * can be easily performed. If the inspected parameter is not stored in the
+ * table, <not found> is returned.
+ */
+static int sample_conv_table_glitch_cnt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GLITCH_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the front glitch rate the key if the key is
+ * present in the table, otherwise zero, so that comparisons can be easily
+ * performed. If the inspected parameter is not stored in the table, <not found>
+ * is returned.
+ */
+static int sample_conv_table_glitch_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GLITCH_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_GLITCH_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
/* Casts sample <smp> to the type of the table specified in arg_p(1), and looks
* it up into this table. Returns the value of the GPT[arg_p(0)] tag for the key
* if the key is present in the table, otherwise false, so that comparisons can
@@ -4218,6 +4455,85 @@ smp_fetch_sc_conn_cur(const struct arg *args, struct sample *smp, const char *kw
return 1;
}
+/* set <smp> to the cumulated number of glitches from the stream or session's
+ * tracked frontend counters. Supports being called as "sc[0-9]_glitch_cnt" or
+ * "src_glitch_cnt" only.
+ */
+static int
+smp_fetch_sc_glitch_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GLITCH_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the rate of glitches from the stream or session's tracked
+ * frontend counters. Supports being called as "sc[0-9]_glitch_rate" or
+ * "src_glitch_rate" only.
+ */
+static int
+smp_fetch_sc_glitch_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GLITCH_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_GLITCH_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
/* set <smp> to the cumulated number of streams from the stream's tracked
* frontend counters. Supports being called as "sc[0-9]_sess_cnt" or
* "src_sess_cnt" only.
@@ -4885,6 +5201,7 @@ struct show_table_ctx {
void *target; /* table we want to dump, or NULL for all */
struct stktable *t; /* table being currently dumped (first if NULL) */
struct stksess *entry; /* last entry we were trying to dump (or first if NULL) */
+ int tree_head; /* tree head currently being visited */
long long value[STKTABLE_FILTER_LEN]; /* value to compare against */
signed char data_type[STKTABLE_FILTER_LEN]; /* type of data to compare, or -1 if none */
signed char data_op[STKTABLE_FILTER_LEN]; /* operator (STD_OP_*) when data_type set */
@@ -4896,39 +5213,22 @@ struct show_table_ctx {
char action; /* action on the table : one of STK_CLI_ACT_* */
};
-/* Processes a single table entry matching a specific key passed in argument.
- * returns 0 if wants to be called again, 1 if has ended processing.
+/* Processes a single table entry <ts>.
+ * returns 0 if it wants to be called again, 1 if has ended processing.
*/
-static int table_process_entry_per_key(struct appctx *appctx, char **args)
+static int table_process_entry(struct appctx *appctx, struct stksess *ts, char **args)
{
struct show_table_ctx *ctx = appctx->svcctx;
struct stktable *t = ctx->target;
- struct stksess *ts;
- struct sample key;
long long value;
int data_type;
int cur_arg;
void *ptr;
struct freq_ctr *frqp;
- if (!*args[4])
- return cli_err(appctx, "Key value expected\n");
-
- memset(&key, 0, sizeof(key));
- key.data.type = SMP_T_STR;
- key.data.u.str.area = args[4];
- key.data.u.str.data = strlen(args[4]);
-
switch (t->type) {
case SMP_T_IPV4:
case SMP_T_IPV6:
- /* prefer input format over table type when parsing ip addresses,
- * then let smp_to_stkey() do the conversion for us when needed
- */
- BUG_ON(!sample_casts[key.data.type][SMP_T_ADDR]);
- if (!sample_casts[key.data.type][SMP_T_ADDR](&key))
- return cli_err(appctx, "Invalid key\n");
- break;
case SMP_T_SINT:
case SMP_T_STR:
break;
@@ -4945,21 +5245,15 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args)
}
}
- /* try to convert key according to table type
- * (it will fill static_table_key on success)
- */
- if (!smp_to_stkey(&key, t))
- return cli_err(appctx, "Invalid key\n");
-
/* check permissions */
if (!cli_has_level(appctx, ACCESS_LVL_OPER))
return 1;
+ if (!ts)
+ return 1;
+
switch (ctx->action) {
case STK_CLI_ACT_SHOW:
- ts = stktable_lookup_key(t, &static_table_key);
- if (!ts)
- return 1;
chunk_reset(&trash);
if (!table_dump_head_to_buffer(&trash, appctx, t, t)) {
stktable_release(t, ts);
@@ -4976,10 +5270,6 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args)
break;
case STK_CLI_ACT_CLR:
- ts = stktable_lookup_key(t, &static_table_key);
- if (!ts)
- return 1;
-
if (!stksess_kill(t, ts, 1)) {
/* don't delete an entry which is currently referenced */
return cli_err(appctx, "Entry currently in use, cannot remove\n");
@@ -4987,11 +5277,6 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args)
break;
case STK_CLI_ACT_SET:
- ts = stktable_get_entry(t, &static_table_key);
- if (!ts) {
- /* don't delete an entry which is currently referenced */
- return cli_err(appctx, "Unable to allocate a new entry\n");
- }
HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
for (cur_arg = 5; *args[cur_arg]; cur_arg += 2) {
if (strncmp(args[cur_arg], "data.", 5) != 0) {
@@ -5023,7 +5308,7 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args)
return 1;
}
- ptr = stktable_data_ptr(t, ts, data_type);
+ ptr = __stktable_data_ptr(t, ts, data_type);
switch (stktable_data_types[data_type].std_type) {
case STD_T_SINT:
@@ -5060,6 +5345,82 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args)
return cli_err(appctx, "Unknown action\n");
}
return 1;
+
+}
+
+/* Processes a single table entry matching a specific key passed in argument.
+ * returns 0 if wants to be called again, 1 if has ended processing.
+ */
+static int table_process_entry_per_key(struct appctx *appctx, char **args)
+{
+ struct show_table_ctx *ctx = appctx->svcctx;
+ struct stktable *t = ctx->target;
+ struct stksess *ts;
+ struct sample key;
+
+ if (!*args[4])
+ return cli_err(appctx, "Key value expected\n");
+
+ memset(&key, 0, sizeof(key));
+ key.data.type = SMP_T_STR;
+ key.data.u.str.area = args[4];
+ key.data.u.str.data = strlen(args[4]);
+
+ switch (t->type) {
+ case SMP_T_IPV4:
+ case SMP_T_IPV6:
+ /* prefer input format over table type when parsing ip addresses,
+ * then let smp_to_stkey() do the conversion for us when needed
+ */
+ BUG_ON(!sample_casts[key.data.type][SMP_T_ADDR]);
+ if (!sample_casts[key.data.type][SMP_T_ADDR](&key))
+ return cli_err(appctx, "Invalid key\n");
+ break;
+ default:
+ /* nothing to do */
+ break;
+ }
+
+ /* try to convert key according to table type
+ * (it will fill static_table_key on success)
+ */
+ if (!smp_to_stkey(&key, t))
+ return cli_err(appctx, "Invalid key\n");
+
+ if (ctx->action == STK_CLI_ACT_SET) {
+ ts = stktable_get_entry(t, &static_table_key);
+ if (!ts)
+ return cli_err(appctx, "Unable to allocate a new entry\n");
+ } else
+ ts = stktable_lookup_key(t, &static_table_key);
+
+ return table_process_entry(appctx, ts, args);
+}
+
+/* Processes a single table entry matching a specific ptr passed in argument.
+ * returns 0 if wants to be called again, 1 if has ended processing.
+ */
+static int table_process_entry_per_ptr(struct appctx *appctx, char **args)
+{
+ struct show_table_ctx *ctx = appctx->svcctx;
+ struct stktable *t = ctx->target;
+ ulong ptr;
+ char *error;
+ struct stksess *ts;
+
+ if (!*args[4] || args[4][0] != '0' || args[4][1] != 'x')
+ return cli_err(appctx, "Pointer expected (0xffff notation)\n");
+
+ /* Convert argument to integer value */
+ ptr = strtoul(args[4], &error, 16);
+ if (*error != '\0')
+ return cli_err(appctx, "Malformed ptr.\n");
+
+ ts = stktable_lookup_ptr(t, (void *)ptr);
+ if (!ts)
+ return cli_err(appctx, "No entry can be found matching ptr.\n");
+
+ return table_process_entry(appctx, ts, args);
}
/* Prepares the appctx fields with the data-based filters from the command line.
@@ -5127,6 +5488,8 @@ static int cli_parse_table_req(char **args, char *payload, struct appctx *appctx
if (strcmp(args[3], "key") == 0)
return table_process_entry_per_key(appctx, args);
+ if (strcmp(args[3], "ptr") == 0)
+ return table_process_entry_per_ptr(appctx, args);
else if (strncmp(args[3], "data.", 5) == 0)
return table_prepare_data_request(appctx, args);
else if (*args[3])
@@ -5137,11 +5500,11 @@ static int cli_parse_table_req(char **args, char *payload, struct appctx *appctx
err_args:
switch (ctx->action) {
case STK_CLI_ACT_SHOW:
- return cli_err(appctx, "Optional argument only supports \"data.<store_data_type>\" <operator> <value> and key <key>\n");
+ return cli_err(appctx, "Optional argument only supports \"data.<store_data_type>\" <operator> <value> or key <key> or ptr <ptr>\n");
case STK_CLI_ACT_CLR:
- return cli_err(appctx, "Required arguments: <table> \"data.<store_data_type>\" <operator> <value> or <table> key <key>\n");
+ return cli_err(appctx, "Required arguments: <table> \"data.<store_data_type>\" <operator> <value> or <table> key <key> or <table> ptr <ptr>\n");
case STK_CLI_ACT_SET:
- return cli_err(appctx, "Required arguments: <table> key <key> [data.<store_data_type> <value>]*\n");
+ return cli_err(appctx, "Required arguments: <table> key <key> [data.<store_data_type> <value>]* or <table> ptr <ptr> [data.<store_data_type> <value>]*\n");
default:
return cli_err(appctx, "Unknown action\n");
}
@@ -5159,6 +5522,7 @@ static int cli_io_handler_table(struct appctx *appctx)
struct ebmb_node *eb;
int skip_entry;
int show = ctx->action == STK_CLI_ACT_SHOW;
+ int shard = ctx->tree_head;
/*
* We have 3 possible states in ctx->state :
@@ -5170,14 +5534,6 @@ static int cli_io_handler_table(struct appctx *appctx)
* - STATE_DONE : nothing left to dump, the buffer may contain some
* data though.
*/
- /* FIXME: Don't watch the other side !*/
- if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) {
- /* in case of abort, remove any refcount we might have set on an entry */
- if (ctx->state == STATE_DUMP) {
- stksess_kill_if_expired(ctx->t, ctx->entry, 1);
- }
- return 1;
- }
chunk_reset(&trash);
@@ -5192,22 +5548,30 @@ static int cli_io_handler_table(struct appctx *appctx)
}
if (ctx->t->size) {
- if (show && !table_dump_head_to_buffer(&trash, appctx, ctx->t, ctx->target))
+ if (show && !shard && !table_dump_head_to_buffer(&trash, appctx, ctx->t, ctx->target))
return 0;
if (ctx->target &&
(strm_li(s)->bind_conf->level & ACCESS_LVL_MASK) >= ACCESS_LVL_OPER) {
/* dump entries only if table explicitly requested */
- HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->lock);
- eb = ebmb_first(&ctx->t->keys);
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock);
+ eb = ebmb_first(&ctx->t->shards[shard].keys);
if (eb) {
ctx->entry = ebmb_entry(eb, struct stksess, key);
HA_ATOMIC_INC(&ctx->entry->ref_cnt);
ctx->state = STATE_DUMP;
- HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock);
break;
}
- HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock);
+
+ /* we come here if we didn't find any entry in this shard */
+ shard = ++ctx->tree_head;
+ if (shard < CONFIG_HAP_TBL_BUCKETS)
+ break; // try again on new shard
+
+ /* fall through next table */
+ shard = ctx->tree_head = 0;
}
}
ctx->t = ctx->t->next;
@@ -5275,7 +5639,7 @@ static int cli_io_handler_table(struct appctx *appctx)
HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ctx->entry->lock);
- HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock);
HA_ATOMIC_DEC(&ctx->entry->ref_cnt);
eb = ebmb_next(&ctx->entry->key);
@@ -5287,7 +5651,7 @@ static int cli_io_handler_table(struct appctx *appctx)
else if (!skip_entry && !ctx->entry->ref_cnt)
__stksess_kill(ctx->t, old);
HA_ATOMIC_INC(&ctx->entry->ref_cnt);
- HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock);
break;
}
@@ -5297,9 +5661,13 @@ static int cli_io_handler_table(struct appctx *appctx)
else if (!skip_entry && !HA_ATOMIC_LOAD(&ctx->entry->ref_cnt))
__stksess_kill(ctx->t, ctx->entry);
- HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock);
- ctx->t = ctx->t->next;
+ shard = ++ctx->tree_head;
+ if (shard >= CONFIG_HAP_TBL_BUCKETS) {
+ shard = ctx->tree_head = 0;
+ ctx->t = ctx->t->next;
+ }
ctx->state = STATE_NEXT;
break;
@@ -5481,6 +5849,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, {
{ "sc_get_gpc", smp_fetch_sc_get_gpc, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc_get_gpc0", smp_fetch_sc_get_gpc0, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc_get_gpc1", smp_fetch_sc_get_gpc1, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "sc_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_glitch_rate", smp_fetch_sc_glitch_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc_gpc_rate", smp_fetch_sc_gpc_rate, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
@@ -5509,6 +5879,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, {
{ "sc0_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc0_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc0_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc0_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc0_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc0_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
@@ -5536,6 +5908,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, {
{ "sc1_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc1_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc1_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc1_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc1_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc1_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
@@ -5562,6 +5936,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, {
{ "sc2_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc2_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc2_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc2_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc2_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "sc2_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
@@ -5591,6 +5967,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, {
{ "src_get_gpc", smp_fetch_sc_get_gpc, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
{ "src_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
{ "src_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
{ "src_gpc_rate", smp_fetch_sc_gpc_rate, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
{ "src_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
{ "src_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
@@ -5632,6 +6010,8 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, {
{ "table_gpc_rate", sample_conv_table_gpc_rate, ARG2(2,SINT,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
{ "table_gpc0_rate", sample_conv_table_gpc0_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
{ "table_gpc1_rate", sample_conv_table_gpc1_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_glitch_cnt", sample_conv_table_glitch_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_glitch_rate", sample_conv_table_glitch_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
{ "table_http_err_cnt", sample_conv_table_http_err_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
{ "table_http_err_rate", sample_conv_table_http_err_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
{ "table_http_fail_cnt", sample_conv_table_http_fail_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
@@ -5656,3 +6036,73 @@ static struct cfg_kw_list cfg_kws = {{ },{
}};
INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+
+#if defined(USE_PROMEX)
+
+static int stk_promex_metric_info(unsigned int id, struct promex_metric *metric, struct ist *desc)
+{
+ switch (id) {
+ case STICKTABLE_SIZE:
+ *metric = (struct promex_metric){ .n = ist("size"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_MODULE_METRIC };
+ *desc = ist("Stick table size.");
+ break;
+ case STICKTABLE_USED:
+ *metric = (struct promex_metric){ .n = ist("used"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_MODULE_METRIC };
+ *desc = ist("Number of entries used in this stick table.");
+ break;
+ default:
+ return -1;
+ }
+ return 1;
+}
+
+static void *stk_promex_start_ts(void *unused, unsigned int id)
+{
+ return stktables_list;
+}
+
+static void *stk_promex_next_ts(void *unused, void *metric_ctx, unsigned int id)
+{
+ struct stktable *t = metric_ctx;
+
+ return t->next;
+}
+
+static int stk_promex_fill_ts(void *unused, void *metric_ctx, unsigned int id, struct promex_label *labels, struct field *field)
+{
+ struct stktable *t = metric_ctx;
+
+ if (!t->size)
+ return 0;
+
+ labels[0].name = ist("name");
+ labels[0].value = ist(t->id);
+ labels[1].name = ist("type");
+ labels[1].value = ist(stktable_types[t->type].kw);
+
+ switch (id) {
+ case STICKTABLE_SIZE:
+ *field = mkf_u32(FN_GAUGE, t->size);
+ break;
+ case STICKTABLE_USED:
+ *field = mkf_u32(FN_GAUGE, t->current);
+ break;
+ default:
+ return -1;
+ }
+ return 1;
+}
+
+static struct promex_module promex_sticktable_module = {
+ .name = IST("sticktable"),
+ .metric_info = stk_promex_metric_info,
+ .start_ts = stk_promex_start_ts,
+ .next_ts = stk_promex_next_ts,
+ .fill_ts = stk_promex_fill_ts,
+ .nb_metrics = STICKTABLE_TOTAL_FIELDS,
+};
+
+INITCALL1(STG_REGISTER, promex_register_module, &promex_sticktable_module);
+
+#endif