diff options
Diffstat (limited to '')
-rw-r--r-- | src/stick_table.c | 1000 |
1 files changed, 725 insertions, 275 deletions
diff --git a/src/stick_table.c b/src/stick_table.c index b1ce9d4..08a22e4 100644 --- a/src/stick_table.c +++ b/src/stick_table.c @@ -46,6 +46,18 @@ #include <haproxy/tools.h> #include <haproxy/xxhash.h> +#if defined(USE_PROMEX) +#include <promex/promex.h> +#endif + +/* stick table base fields */ +enum sticktable_field { + STICKTABLE_SIZE = 0, + STICKTABLE_USED, + /* must always be the last one */ + STICKTABLE_TOTAL_FIELDS +}; + /* structure used to return a table key built from a sample */ static THREAD_LOCAL struct stktable_key static_table_key; @@ -98,15 +110,27 @@ void __stksess_free(struct stktable *t, struct stksess *ts) */ void stksess_free(struct stktable *t, struct stksess *ts) { + uint shard; + size_t len; void *data; + data = stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_KEY); if (data) { dict_entry_unref(&server_key_dict, stktable_data_cast(data, std_t_dict)); stktable_data_cast(data, std_t_dict) = NULL; } - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock); + + if (t->type == SMP_T_STR) + len = strlen((const char *)ts->key.key); + else + len = t->key_size; + + shard = stktable_calc_shard_num(t, ts->key.key, len); + + /* make the compiler happy when shard is not used without threads */ + ALREADY_CHECKED(shard); + __stksess_free(t, ts); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock); } /* @@ -115,17 +139,25 @@ void stksess_free(struct stktable *t, struct stksess *ts) */ int __stksess_kill(struct stktable *t, struct stksess *ts) { + int updt_locked = 0; + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) return 0; - eb32_delete(&ts->exp); if (ts->upd.node.leaf_p) { + updt_locked = 1; HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); - eb32_delete(&ts->upd); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) + goto out_unlock; } + eb32_delete(&ts->exp); + eb32_delete(&ts->upd); ebmb_delete(&ts->key); __stksess_free(t, ts); + + out_unlock: + if (updt_locked) + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); return 1; } @@ -136,14 +168,26 @@ int __stksess_kill(struct stktable *t, struct stksess *ts) */ int stksess_kill(struct stktable *t, struct stksess *ts, int decrefcnt) { + uint shard; + size_t len; int ret; if (decrefcnt && HA_ATOMIC_SUB_FETCH(&ts->ref_cnt, 1) != 0) return 0; - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); + if (t->type == SMP_T_STR) + len = strlen((const char *)ts->key.key); + else + len = t->key_size; + + shard = stktable_calc_shard_num(t, ts->key.key, len); + + /* make the compiler happy when shard is not used without threads */ + ALREADY_CHECKED(shard); + + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); ret = __stksess_kill(t, ts); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); return ret; } @@ -203,6 +247,7 @@ static struct stksess *__stksess_init(struct stktable *t, struct stksess * ts) memset((void *)ts - t->data_size, 0, t->data_size); ts->ref_cnt = 0; ts->shard = 0; + ts->seen = 0; ts->key.node.leaf_p = NULL; ts->exp.node.leaf_p = NULL; ts->upd.node.leaf_p = NULL; @@ -215,100 +260,124 @@ static struct stksess *__stksess_init(struct stktable *t, struct stksess * ts) * Trash oldest <to_batch> sticky sessions from table <t> * Returns number of trashed sticky sessions. It may actually trash less * than expected if finding these requires too long a search time (e.g. - * most of them have ts->ref_cnt>0). + * most of them have ts->ref_cnt>0). This function locks the table. */ -int __stktable_trash_oldest(struct stktable *t, int to_batch) +int stktable_trash_oldest(struct stktable *t, int to_batch) { struct stksess *ts; struct eb32_node *eb; int max_search = to_batch * 2; // no more than 50% misses + int max_per_shard = (to_batch + CONFIG_HAP_TBL_BUCKETS - 1) / CONFIG_HAP_TBL_BUCKETS; + int done_per_shard; int batched = 0; - int looped = 0; + int updt_locked; + int looped; + int shard; - eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK); + shard = 0; while (batched < to_batch) { + done_per_shard = 0; + looped = 0; + updt_locked = 0; + + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + + eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK); + while (batched < to_batch && done_per_shard < max_per_shard) { + if (unlikely(!eb)) { + /* we might have reached the end of the tree, typically because + * <now_ms> is in the first half and we're first scanning the last + * half. Let's loop back to the beginning of the tree now if we + * have not yet visited it. + */ + if (looped) + break; + looped = 1; + eb = eb32_first(&t->shards[shard].exps); + if (likely(!eb)) + break; + } - if (unlikely(!eb)) { - /* we might have reached the end of the tree, typically because - * <now_ms> is in the first half and we're first scanning the last - * half. Let's loop back to the beginning of the tree now if we - * have not yet visited it. - */ - if (looped) - break; - looped = 1; - eb = eb32_first(&t->exps); - if (likely(!eb)) + if (--max_search < 0) break; - } - if (--max_search < 0) - break; + /* timer looks expired, detach it from the queue */ + ts = eb32_entry(eb, struct stksess, exp); + eb = eb32_next(eb); - /* timer looks expired, detach it from the queue */ - ts = eb32_entry(eb, struct stksess, exp); - eb = eb32_next(eb); + /* don't delete an entry which is currently referenced */ + if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) + continue; - /* don't delete an entry which is currently referenced */ - if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) - continue; + eb32_delete(&ts->exp); - eb32_delete(&ts->exp); + if (ts->expire != ts->exp.key) { + if (!tick_isset(ts->expire)) + continue; - if (ts->expire != ts->exp.key) { - if (!tick_isset(ts->expire)) - continue; + ts->exp.key = ts->expire; + eb32_insert(&t->shards[shard].exps, &ts->exp); - ts->exp.key = ts->expire; - eb32_insert(&t->exps, &ts->exp); + /* the update might have jumped beyond the next element, + * possibly causing a wrapping. We need to check whether + * the next element should be used instead. If the next + * element doesn't exist it means we're on the right + * side and have to check the first one then. If it + * exists and is closer, we must use it, otherwise we + * use the current one. + */ + if (!eb) + eb = eb32_first(&t->shards[shard].exps); - /* the update might have jumped beyond the next element, - * possibly causing a wrapping. We need to check whether - * the next element should be used instead. If the next - * element doesn't exist it means we're on the right - * side and have to check the first one then. If it - * exists and is closer, we must use it, otherwise we - * use the current one. - */ - if (!eb) - eb = eb32_first(&t->exps); + if (!eb || tick_is_lt(ts->exp.key, eb->key)) + eb = &ts->exp; - if (!eb || tick_is_lt(ts->exp.key, eb->key)) - eb = &ts->exp; + continue; + } - continue; - } + /* if the entry is in the update list, we must be extremely careful + * because peers can see it at any moment and start to use it. Peers + * will take the table's updt_lock for reading when doing that, and + * with that lock held, will grab a ref_cnt before releasing the + * lock. So we must take this lock as well and check the ref_cnt. + */ + if (ts->upd.node.leaf_p) { + if (!updt_locked) { + updt_locked = 1; + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + } + /* now we're locked, new peers can't grab it anymore, + * existing ones already have the ref_cnt. + */ + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) + continue; + } - /* session expired, trash it */ - ebmb_delete(&ts->key); - if (ts->upd.node.leaf_p) { - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + /* session expired, trash it */ + ebmb_delete(&ts->key); eb32_delete(&ts->upd); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); + __stksess_free(t, ts); + batched++; + done_per_shard++; } - __stksess_free(t, ts); - batched++; - } - return batched; -} + if (updt_locked) + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); -/* - * Trash oldest <to_batch> sticky sessions from table <t> - * Returns number of trashed sticky sessions. - * This function locks the table - */ -int stktable_trash_oldest(struct stktable *t, int to_batch) -{ - int ret; + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - ret = __stktable_trash_oldest(t, to_batch); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + if (max_search <= 0) + break; - return ret; + shard = (shard + 1) % CONFIG_HAP_TBL_BUCKETS; + if (!shard) + break; + } + + return batched; } + /* * Allocate and initialise a new sticky session. * The new sticky session is returned or NULL in case of lack of memory. @@ -346,17 +415,17 @@ struct stksess *stksess_new(struct stktable *t, struct stktable_key *key) } /* - * Looks in table <t> for a sticky session matching key <key>. + * Looks in table <t> for a sticky session matching key <key> in shard <shard>. * Returns pointer on requested sticky session or NULL if none was found. */ -struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *key) +struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *key, uint shard) { struct ebmb_node *eb; if (t->type == SMP_T_STR) - eb = ebst_lookup_len(&t->keys, key->key, key->key_len+1 < t->key_size ? key->key_len : t->key_size-1); + eb = ebst_lookup_len(&t->shards[shard].keys, key->key, key->key_len + 1 < t->key_size ? key->key_len : t->key_size - 1); else - eb = ebmb_lookup(&t->keys, key->key, t->key_size); + eb = ebmb_lookup(&t->shards[shard].keys, key->key, t->key_size); if (unlikely(!eb)) { /* no session found */ @@ -375,12 +444,60 @@ struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *k struct stksess *stktable_lookup_key(struct stktable *t, struct stktable_key *key) { struct stksess *ts; + uint shard; + size_t len; + + if (t->type == SMP_T_STR) + len = key->key_len + 1 < t->key_size ? key->key_len : t->key_size - 1; + else + len = t->key_size; - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock); - ts = __stktable_lookup_key(t, key); + shard = stktable_calc_shard_num(t, key->key, len); + + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + ts = __stktable_lookup_key(t, key, shard); if (ts) HA_ATOMIC_INC(&ts->ref_cnt); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + + return ts; +} + +/* + * Looks in table <t> for a sticky session matching ptr <ptr>. + * Returns pointer on requested sticky session or NULL if none was found. + * The refcount of the found entry is increased and this function + * is protected using the table lock + */ +struct stksess *stktable_lookup_ptr(struct stktable *t, void *ptr) +{ + struct stksess *ts = NULL; + struct ebmb_node *eb; + int shard; + + for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) { + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + /* linear search is performed, this could be optimized by adding + * an eb node dedicated to ptr lookups into stksess struct to + * leverage eb_lookup function instead. + */ + eb = ebmb_first(&t->shards[shard].keys); + while (eb) { + struct stksess *cur; + + cur = ebmb_entry(eb, struct stksess, key); + if (cur == ptr) { + ts = cur; + break; + } + eb = ebmb_next(eb); + } + if (ts) + HA_ATOMIC_INC(&ts->ref_cnt); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + if (ts) + return ts; + } return ts; } @@ -389,14 +506,14 @@ struct stksess *stktable_lookup_key(struct stktable *t, struct stktable_key *key * Looks in table <t> for a sticky session with same key as <ts>. * Returns pointer on requested sticky session or NULL if none was found. */ -struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts) +struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts, uint shard) { struct ebmb_node *eb; if (t->type == SMP_T_STR) - eb = ebst_lookup(&(t->keys), (char *)ts->key.key); + eb = ebst_lookup(&t->shards[shard].keys, (char *)ts->key.key); else - eb = ebmb_lookup(&(t->keys), ts->key.key, t->key_size); + eb = ebmb_lookup(&t->shards[shard].keys, ts->key.key, t->key_size); if (unlikely(!eb)) return NULL; @@ -413,12 +530,21 @@ struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts) struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts) { struct stksess *lts; + uint shard; + size_t len; + + if (t->type == SMP_T_STR) + len = strlen((const char *)ts->key.key); + else + len = t->key_size; + + shard = stktable_calc_shard_num(t, ts->key.key, len); - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock); - lts = __stktable_lookup(t, ts); + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + lts = __stktable_lookup(t, ts, shard); if (lts) HA_ATOMIC_INC(<s->ref_cnt); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); return lts; } @@ -428,7 +554,7 @@ struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts) * The node will be also inserted into the update tree if needed, at a position * depending if the update is a local or coming from a remote node. * If <decrefcnt> is set, the ts entry's ref_cnt will be decremented. The table's - * write lock may be taken. + * updt_lock may be taken for writes. */ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, int expire, int decrefcnt) { @@ -444,39 +570,18 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, /* If sync is enabled */ if (t->sync_task) { - try_lock_again: - /* We'll need to reliably check that the entry is in the tree. - * It's only inserted/deleted using a write lock so a read lock - * is sufficient to verify this. We may then need to upgrade it - * to perform an update (which is rare under load), and if the - * upgrade fails, we'll try again with a write lock directly. - */ - if (use_wrlock) - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); - else - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->updt_lock); - if (local) { /* Check if this entry is not in the tree or not * scheduled for at least one peer. */ - if (!ts->upd.node.leaf_p - || (int)(t->commitupdate - ts->upd.key) >= 0 - || (int)(ts->upd.key - t->localupdate) >= 0) { - /* Time to upgrade the read lock to write lock if needed */ - if (!use_wrlock) { - if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &t->updt_lock) != 0) { - /* failed, try again */ - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock); - use_wrlock = 1; - goto try_lock_again; - } - HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &t->updt_lock); - use_wrlock = 1; - } + if (!ts->upd.node.leaf_p || _HA_ATOMIC_LOAD(&ts->seen)) { + /* Time to upgrade the read lock to write lock */ + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + use_wrlock = 1; /* here we're write-locked */ + ts->seen = 0; ts->upd.key = ++t->update; t->localupdate = t->update; eb32_delete(&ts->upd); @@ -489,28 +594,30 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, do_wakeup = 1; } else { - /* If this entry is not in the tree */ - + /* Note: we land here when learning new entries from + * remote peers. We hold one ref_cnt so the entry + * cannot vanish under us, however if two peers create + * the same key at the exact same time, we must be + * careful not to perform two parallel inserts! Hence + * we need to first check leaf_p to know if the entry + * is new, then lock the tree and check the entry again + * (since another thread could have created it in the + * mean time). + */ if (!ts->upd.node.leaf_p) { /* Time to upgrade the read lock to write lock if needed */ - if (!use_wrlock) { - if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &t->updt_lock) != 0) { - /* failed, try again */ - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock); - use_wrlock = 1; - goto try_lock_again; - } - HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &t->updt_lock); - use_wrlock = 1; - } + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + use_wrlock = 1; /* here we're write-locked */ - - ts->upd.key= (++t->update)+(2147483648U); - eb = eb32_insert(&t->updates, &ts->upd); - if (eb != &ts->upd) { - eb32_delete(eb); - eb32_insert(&t->updates, &ts->upd); + if (!ts->upd.node.leaf_p) { + ts->seen = 0; + ts->upd.key= (++t->update)+(2147483648U); + eb = eb32_insert(&t->updates, &ts->upd); + if (eb != &ts->upd) { + eb32_delete(eb); + eb32_insert(&t->updates, &ts->upd); + } } } } @@ -518,8 +625,6 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, /* drop the lock now */ if (use_wrlock) HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); - else - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock); } if (decrefcnt) @@ -569,14 +674,14 @@ static void stktable_release(struct stktable *t, struct stksess *ts) * is set. <ts> is returned if properly inserted, otherwise the one already * present if any. */ -struct stksess *__stktable_store(struct stktable *t, struct stksess *ts) +struct stksess *__stktable_store(struct stktable *t, struct stksess *ts, uint shard) { struct ebmb_node *eb; - eb = ebmb_insert(&t->keys, &ts->key, t->key_size); + eb = ebmb_insert(&t->shards[shard].keys, &ts->key, t->key_size); if (likely(eb == &ts->key)) { ts->exp.key = ts->expire; - eb32_insert(&t->exps, &ts->exp); + eb32_insert(&t->shards[shard].exps, &ts->exp); } return ebmb_entry(eb, struct stksess, key); // most commonly this is <ts> } @@ -621,11 +726,24 @@ void stktable_requeue_exp(struct stktable *t, const struct stksess *ts) struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key *key) { struct stksess *ts, *ts2; + uint shard; + size_t len; if (!key) return NULL; - ts = stktable_lookup_key(table, key); + if (table->type == SMP_T_STR) + len = key->key_len + 1 < table->key_size ? key->key_len : table->key_size - 1; + else + len = table->key_size; + + shard = stktable_calc_shard_num(table, key->key, len); + + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); + ts = __stktable_lookup_key(table, key, shard); + if (ts) + HA_ATOMIC_INC(&ts->ref_cnt); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); if (ts) return ts; @@ -645,12 +763,12 @@ struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key * * one we find. */ - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); - ts2 = __stktable_store(table, ts); + ts2 = __stktable_store(table, ts, shard); HA_ATOMIC_INC(&ts2->ref_cnt); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); if (unlikely(ts2 != ts)) { /* another entry was added in the mean time, let's @@ -671,12 +789,21 @@ struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key * struct stksess *stktable_set_entry(struct stktable *table, struct stksess *nts) { struct stksess *ts; + uint shard; + size_t len; + + if (table->type == SMP_T_STR) + len = strlen((const char *)nts->key.key); + else + len = table->key_size; + + shard = stktable_calc_shard_num(table, nts->key.key, len); - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->lock); - ts = __stktable_lookup(table, nts); + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); + ts = __stktable_lookup(table, nts, shard); if (ts) { HA_ATOMIC_INC(&ts->ref_cnt); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); return ts; } ts = nts; @@ -684,18 +811,18 @@ struct stksess *stktable_set_entry(struct stktable *table, struct stksess *nts) /* let's increment it before switching to exclusive */ HA_ATOMIC_INC(&ts->ref_cnt); - if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &table->lock) != 0) { + if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &table->shards[shard].sh_lock) != 0) { /* upgrade to seek lock failed, let's drop and take */ - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->lock); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); } else - HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &table->shards[shard].sh_lock); /* now we're write-locked */ - __stktable_store(table, ts); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->lock); + __stktable_store(table, ts, shard); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); stktable_requeue_exp(table, ts); return ts; @@ -710,87 +837,117 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int struct stktable *t = context; struct stksess *ts; struct eb32_node *eb; - int updt_locked = 0; - int looped = 0; + int updt_locked; + int looped; int exp_next; + int task_exp; + int shard; + + task_exp = TICK_ETERNITY; + + for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) { + updt_locked = 0; + looped = 0; + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK); + + while (1) { + if (unlikely(!eb)) { + /* we might have reached the end of the tree, typically because + * <now_ms> is in the first half and we're first scanning the last + * half. Let's loop back to the beginning of the tree now if we + * have not yet visited it. + */ + if (looped) + break; + looped = 1; + eb = eb32_first(&t->shards[shard].exps); + if (likely(!eb)) + break; + } - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK); - - while (1) { - if (unlikely(!eb)) { - /* we might have reached the end of the tree, typically because - * <now_ms> is in the first half and we're first scanning the last - * half. Let's loop back to the beginning of the tree now if we - * have not yet visited it. - */ - if (looped) - break; - looped = 1; - eb = eb32_first(&t->exps); - if (likely(!eb)) - break; - } - - if (likely(tick_is_lt(now_ms, eb->key))) { - /* timer not expired yet, revisit it later */ - exp_next = eb->key; - goto out_unlock; - } + if (likely(tick_is_lt(now_ms, eb->key))) { + /* timer not expired yet, revisit it later */ + exp_next = eb->key; + goto out_unlock; + } - /* timer looks expired, detach it from the queue */ - ts = eb32_entry(eb, struct stksess, exp); - eb = eb32_next(eb); + /* timer looks expired, detach it from the queue */ + ts = eb32_entry(eb, struct stksess, exp); + eb = eb32_next(eb); - /* don't delete an entry which is currently referenced */ - if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) - continue; + /* don't delete an entry which is currently referenced */ + if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) + continue; - eb32_delete(&ts->exp); + eb32_delete(&ts->exp); - if (!tick_is_expired(ts->expire, now_ms)) { - if (!tick_isset(ts->expire)) - continue; + if (!tick_is_expired(ts->expire, now_ms)) { + if (!tick_isset(ts->expire)) + continue; - ts->exp.key = ts->expire; - eb32_insert(&t->exps, &ts->exp); + ts->exp.key = ts->expire; + eb32_insert(&t->shards[shard].exps, &ts->exp); - /* the update might have jumped beyond the next element, - * possibly causing a wrapping. We need to check whether - * the next element should be used instead. If the next - * element doesn't exist it means we're on the right - * side and have to check the first one then. If it - * exists and is closer, we must use it, otherwise we - * use the current one. - */ - if (!eb) - eb = eb32_first(&t->exps); + /* the update might have jumped beyond the next element, + * possibly causing a wrapping. We need to check whether + * the next element should be used instead. If the next + * element doesn't exist it means we're on the right + * side and have to check the first one then. If it + * exists and is closer, we must use it, otherwise we + * use the current one. + */ + if (!eb) + eb = eb32_first(&t->shards[shard].exps); - if (!eb || tick_is_lt(ts->exp.key, eb->key)) - eb = &ts->exp; - continue; - } + if (!eb || tick_is_lt(ts->exp.key, eb->key)) + eb = &ts->exp; + continue; + } - /* session expired, trash it */ - ebmb_delete(&ts->key); - if (ts->upd.node.leaf_p) { - if (!updt_locked) { - updt_locked = 1; - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + /* if the entry is in the update list, we must be extremely careful + * because peers can see it at any moment and start to use it. Peers + * will take the table's updt_lock for reading when doing that, and + * with that lock held, will grab a ref_cnt before releasing the + * lock. So we must take this lock as well and check the ref_cnt. + */ + if (ts->upd.node.leaf_p) { + if (!updt_locked) { + updt_locked = 1; + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + } + /* now we're locked, new peers can't grab it anymore, + * existing ones already have the ref_cnt. + */ + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) + continue; } + + /* session expired, trash it */ + ebmb_delete(&ts->key); eb32_delete(&ts->upd); + __stksess_free(t, ts); } - __stksess_free(t, ts); - } - /* We have found no task to expire in any tree */ - exp_next = TICK_ETERNITY; + /* We have found no task to expire in any tree */ + exp_next = TICK_ETERNITY; -out_unlock: - task->expire = exp_next; - if (updt_locked) - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); + out_unlock: + if (updt_locked) + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); + + task_exp = tick_first(task_exp, exp_next); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + } + + /* Reset the task's expiration. We do this under the lock so as not + * to ruin a call to task_queue() in stktable_requeue_exp() if we + * were to update with TICK_ETERNITY. + */ + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); + task->expire = task_exp; HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + return task; } @@ -803,12 +960,17 @@ out_unlock: int stktable_init(struct stktable *t, char **err_msg) { int peers_retval = 0; + int shard; t->hash_seed = XXH64(t->id, t->idlen, 0); if (t->size) { - t->keys = EB_ROOT_UNIQUE; - memset(&t->exps, 0, sizeof(t->exps)); + for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) { + t->shards[shard].keys = EB_ROOT_UNIQUE; + memset(&t->shards[shard].exps, 0, sizeof(t->shards[shard].exps)); + HA_RWLOCK_INIT(&t->shards[shard].sh_lock); + } + t->updates = EB_ROOT_UNIQUE; HA_RWLOCK_INIT(&t->lock); @@ -1402,6 +1564,8 @@ struct stktable_data_type stktable_data_types[STKTABLE_DATA_TYPES] = { [STKTABLE_DT_GPT] = { .name = "gpt", .std_type = STD_T_UINT, .is_array = 1, .as_is = 1 }, [STKTABLE_DT_GPC] = { .name = "gpc", .std_type = STD_T_UINT, .is_array = 1 }, [STKTABLE_DT_GPC_RATE] = { .name = "gpc_rate", .std_type = STD_T_FRQP, .is_array = 1, .arg_type = ARG_T_DELAY }, + [STKTABLE_DT_GLITCH_CNT] = { .name = "glitch_cnt", .std_type = STD_T_UINT }, + [STKTABLE_DT_GLITCH_RATE] = { .name = "glitch_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY }, }; /* Registers stick-table extra data type with index <idx>, name <name>, type @@ -1741,6 +1905,79 @@ static int sample_conv_table_bytes_out_rate(const struct arg *arg_p, struct samp return !!ptr; } +/* Casts sample <smp> to the type of the table specified in arg(0), and looks + * it up into this table. Returns the cumulated number of front glitches for the + * key if the key is present in the table, otherwise zero, so that comparisons + * can be easily performed. If the inspected parameter is not stored in the + * table, <not found> is returned. + */ +static int sample_conv_table_glitch_cnt(const struct arg *arg_p, struct sample *smp, void *private) +{ + struct stktable *t; + struct stktable_key *key; + struct stksess *ts; + void *ptr; + + t = arg_p[0].data.t; + + key = smp_to_stkey(smp, t); + if (!key) + return 0; + + ts = stktable_lookup_key(t, key); + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = 0; + + if (!ts) /* key not present */ + return 1; + + ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GLITCH_CNT); + if (ptr) + smp->data.u.sint = stktable_data_cast(ptr, std_t_uint); + + stktable_release(t, ts); + return !!ptr; +} + +/* Casts sample <smp> to the type of the table specified in arg(0), and looks + * it up into this table. Returns the front glitch rate the key if the key is + * present in the table, otherwise zero, so that comparisons can be easily + * performed. If the inspected parameter is not stored in the table, <not found> + * is returned. + */ +static int sample_conv_table_glitch_rate(const struct arg *arg_p, struct sample *smp, void *private) +{ + struct stktable *t; + struct stktable_key *key; + struct stksess *ts; + void *ptr; + + t = arg_p[0].data.t; + + key = smp_to_stkey(smp, t); + if (!key) + return 0; + + ts = stktable_lookup_key(t, key); + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = 0; + + if (!ts) /* key not present */ + return 1; + + ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GLITCH_RATE); + if (ptr) + smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp), + t->data_arg[STKTABLE_DT_GLITCH_RATE].u); + + stktable_release(t, ts); + return !!ptr; +} + /* Casts sample <smp> to the type of the table specified in arg_p(1), and looks * it up into this table. Returns the value of the GPT[arg_p(0)] tag for the key * if the key is present in the table, otherwise false, so that comparisons can @@ -4218,6 +4455,85 @@ smp_fetch_sc_conn_cur(const struct arg *args, struct sample *smp, const char *kw return 1; } +/* set <smp> to the cumulated number of glitches from the stream or session's + * tracked frontend counters. Supports being called as "sc[0-9]_glitch_cnt" or + * "src_glitch_cnt" only. + */ +static int +smp_fetch_sc_glitch_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct stkctr tmpstkctr; + struct stkctr *stkctr; + + stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr); + if (!stkctr) + return 0; + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = 0; + if (stkctr_entry(stkctr) != NULL) { + void *ptr; + + ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GLITCH_CNT); + if (!ptr) { + if (stkctr == &tmpstkctr) + stktable_release(stkctr->table, stkctr_entry(stkctr)); + return 0; /* parameter not stored */ + } + + HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock); + + smp->data.u.sint = stktable_data_cast(ptr, std_t_uint); + + HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock); + + if (stkctr == &tmpstkctr) + stktable_release(stkctr->table, stkctr_entry(stkctr)); + } + return 1; +} + +/* set <smp> to the rate of glitches from the stream or session's tracked + * frontend counters. Supports being called as "sc[0-9]_glitch_rate" or + * "src_glitch_rate" only. + */ +static int +smp_fetch_sc_glitch_rate(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct stkctr tmpstkctr; + struct stkctr *stkctr; + + stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr); + if (!stkctr) + return 0; + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = 0; + if (stkctr_entry(stkctr) != NULL) { + void *ptr; + + ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GLITCH_RATE); + if (!ptr) { + if (stkctr == &tmpstkctr) + stktable_release(stkctr->table, stkctr_entry(stkctr)); + return 0; /* parameter not stored */ + } + + HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock); + + smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp), + stkctr->table->data_arg[STKTABLE_DT_GLITCH_RATE].u); + + HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock); + + if (stkctr == &tmpstkctr) + stktable_release(stkctr->table, stkctr_entry(stkctr)); + } + return 1; +} + /* set <smp> to the cumulated number of streams from the stream's tracked * frontend counters. Supports being called as "sc[0-9]_sess_cnt" or * "src_sess_cnt" only. @@ -4885,6 +5201,7 @@ struct show_table_ctx { void *target; /* table we want to dump, or NULL for all */ struct stktable *t; /* table being currently dumped (first if NULL) */ struct stksess *entry; /* last entry we were trying to dump (or first if NULL) */ + int tree_head; /* tree head currently being visited */ long long value[STKTABLE_FILTER_LEN]; /* value to compare against */ signed char data_type[STKTABLE_FILTER_LEN]; /* type of data to compare, or -1 if none */ signed char data_op[STKTABLE_FILTER_LEN]; /* operator (STD_OP_*) when data_type set */ @@ -4896,39 +5213,22 @@ struct show_table_ctx { char action; /* action on the table : one of STK_CLI_ACT_* */ }; -/* Processes a single table entry matching a specific key passed in argument. - * returns 0 if wants to be called again, 1 if has ended processing. +/* Processes a single table entry <ts>. + * returns 0 if it wants to be called again, 1 if has ended processing. */ -static int table_process_entry_per_key(struct appctx *appctx, char **args) +static int table_process_entry(struct appctx *appctx, struct stksess *ts, char **args) { struct show_table_ctx *ctx = appctx->svcctx; struct stktable *t = ctx->target; - struct stksess *ts; - struct sample key; long long value; int data_type; int cur_arg; void *ptr; struct freq_ctr *frqp; - if (!*args[4]) - return cli_err(appctx, "Key value expected\n"); - - memset(&key, 0, sizeof(key)); - key.data.type = SMP_T_STR; - key.data.u.str.area = args[4]; - key.data.u.str.data = strlen(args[4]); - switch (t->type) { case SMP_T_IPV4: case SMP_T_IPV6: - /* prefer input format over table type when parsing ip addresses, - * then let smp_to_stkey() do the conversion for us when needed - */ - BUG_ON(!sample_casts[key.data.type][SMP_T_ADDR]); - if (!sample_casts[key.data.type][SMP_T_ADDR](&key)) - return cli_err(appctx, "Invalid key\n"); - break; case SMP_T_SINT: case SMP_T_STR: break; @@ -4945,21 +5245,15 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) } } - /* try to convert key according to table type - * (it will fill static_table_key on success) - */ - if (!smp_to_stkey(&key, t)) - return cli_err(appctx, "Invalid key\n"); - /* check permissions */ if (!cli_has_level(appctx, ACCESS_LVL_OPER)) return 1; + if (!ts) + return 1; + switch (ctx->action) { case STK_CLI_ACT_SHOW: - ts = stktable_lookup_key(t, &static_table_key); - if (!ts) - return 1; chunk_reset(&trash); if (!table_dump_head_to_buffer(&trash, appctx, t, t)) { stktable_release(t, ts); @@ -4976,10 +5270,6 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) break; case STK_CLI_ACT_CLR: - ts = stktable_lookup_key(t, &static_table_key); - if (!ts) - return 1; - if (!stksess_kill(t, ts, 1)) { /* don't delete an entry which is currently referenced */ return cli_err(appctx, "Entry currently in use, cannot remove\n"); @@ -4987,11 +5277,6 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) break; case STK_CLI_ACT_SET: - ts = stktable_get_entry(t, &static_table_key); - if (!ts) { - /* don't delete an entry which is currently referenced */ - return cli_err(appctx, "Unable to allocate a new entry\n"); - } HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); for (cur_arg = 5; *args[cur_arg]; cur_arg += 2) { if (strncmp(args[cur_arg], "data.", 5) != 0) { @@ -5023,7 +5308,7 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) return 1; } - ptr = stktable_data_ptr(t, ts, data_type); + ptr = __stktable_data_ptr(t, ts, data_type); switch (stktable_data_types[data_type].std_type) { case STD_T_SINT: @@ -5060,6 +5345,82 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) return cli_err(appctx, "Unknown action\n"); } return 1; + +} + +/* Processes a single table entry matching a specific key passed in argument. + * returns 0 if wants to be called again, 1 if has ended processing. + */ +static int table_process_entry_per_key(struct appctx *appctx, char **args) +{ + struct show_table_ctx *ctx = appctx->svcctx; + struct stktable *t = ctx->target; + struct stksess *ts; + struct sample key; + + if (!*args[4]) + return cli_err(appctx, "Key value expected\n"); + + memset(&key, 0, sizeof(key)); + key.data.type = SMP_T_STR; + key.data.u.str.area = args[4]; + key.data.u.str.data = strlen(args[4]); + + switch (t->type) { + case SMP_T_IPV4: + case SMP_T_IPV6: + /* prefer input format over table type when parsing ip addresses, + * then let smp_to_stkey() do the conversion for us when needed + */ + BUG_ON(!sample_casts[key.data.type][SMP_T_ADDR]); + if (!sample_casts[key.data.type][SMP_T_ADDR](&key)) + return cli_err(appctx, "Invalid key\n"); + break; + default: + /* nothing to do */ + break; + } + + /* try to convert key according to table type + * (it will fill static_table_key on success) + */ + if (!smp_to_stkey(&key, t)) + return cli_err(appctx, "Invalid key\n"); + + if (ctx->action == STK_CLI_ACT_SET) { + ts = stktable_get_entry(t, &static_table_key); + if (!ts) + return cli_err(appctx, "Unable to allocate a new entry\n"); + } else + ts = stktable_lookup_key(t, &static_table_key); + + return table_process_entry(appctx, ts, args); +} + +/* Processes a single table entry matching a specific ptr passed in argument. + * returns 0 if wants to be called again, 1 if has ended processing. + */ +static int table_process_entry_per_ptr(struct appctx *appctx, char **args) +{ + struct show_table_ctx *ctx = appctx->svcctx; + struct stktable *t = ctx->target; + ulong ptr; + char *error; + struct stksess *ts; + + if (!*args[4] || args[4][0] != '0' || args[4][1] != 'x') + return cli_err(appctx, "Pointer expected (0xffff notation)\n"); + + /* Convert argument to integer value */ + ptr = strtoul(args[4], &error, 16); + if (*error != '\0') + return cli_err(appctx, "Malformed ptr.\n"); + + ts = stktable_lookup_ptr(t, (void *)ptr); + if (!ts) + return cli_err(appctx, "No entry can be found matching ptr.\n"); + + return table_process_entry(appctx, ts, args); } /* Prepares the appctx fields with the data-based filters from the command line. @@ -5127,6 +5488,8 @@ static int cli_parse_table_req(char **args, char *payload, struct appctx *appctx if (strcmp(args[3], "key") == 0) return table_process_entry_per_key(appctx, args); + if (strcmp(args[3], "ptr") == 0) + return table_process_entry_per_ptr(appctx, args); else if (strncmp(args[3], "data.", 5) == 0) return table_prepare_data_request(appctx, args); else if (*args[3]) @@ -5137,11 +5500,11 @@ static int cli_parse_table_req(char **args, char *payload, struct appctx *appctx err_args: switch (ctx->action) { case STK_CLI_ACT_SHOW: - return cli_err(appctx, "Optional argument only supports \"data.<store_data_type>\" <operator> <value> and key <key>\n"); + return cli_err(appctx, "Optional argument only supports \"data.<store_data_type>\" <operator> <value> or key <key> or ptr <ptr>\n"); case STK_CLI_ACT_CLR: - return cli_err(appctx, "Required arguments: <table> \"data.<store_data_type>\" <operator> <value> or <table> key <key>\n"); + return cli_err(appctx, "Required arguments: <table> \"data.<store_data_type>\" <operator> <value> or <table> key <key> or <table> ptr <ptr>\n"); case STK_CLI_ACT_SET: - return cli_err(appctx, "Required arguments: <table> key <key> [data.<store_data_type> <value>]*\n"); + return cli_err(appctx, "Required arguments: <table> key <key> [data.<store_data_type> <value>]* or <table> ptr <ptr> [data.<store_data_type> <value>]*\n"); default: return cli_err(appctx, "Unknown action\n"); } @@ -5159,6 +5522,7 @@ static int cli_io_handler_table(struct appctx *appctx) struct ebmb_node *eb; int skip_entry; int show = ctx->action == STK_CLI_ACT_SHOW; + int shard = ctx->tree_head; /* * We have 3 possible states in ctx->state : @@ -5170,14 +5534,6 @@ static int cli_io_handler_table(struct appctx *appctx) * - STATE_DONE : nothing left to dump, the buffer may contain some * data though. */ - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) { - /* in case of abort, remove any refcount we might have set on an entry */ - if (ctx->state == STATE_DUMP) { - stksess_kill_if_expired(ctx->t, ctx->entry, 1); - } - return 1; - } chunk_reset(&trash); @@ -5192,22 +5548,30 @@ static int cli_io_handler_table(struct appctx *appctx) } if (ctx->t->size) { - if (show && !table_dump_head_to_buffer(&trash, appctx, ctx->t, ctx->target)) + if (show && !shard && !table_dump_head_to_buffer(&trash, appctx, ctx->t, ctx->target)) return 0; if (ctx->target && (strm_li(s)->bind_conf->level & ACCESS_LVL_MASK) >= ACCESS_LVL_OPER) { /* dump entries only if table explicitly requested */ - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->lock); - eb = ebmb_first(&ctx->t->keys); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); + eb = ebmb_first(&ctx->t->shards[shard].keys); if (eb) { ctx->entry = ebmb_entry(eb, struct stksess, key); HA_ATOMIC_INC(&ctx->entry->ref_cnt); ctx->state = STATE_DUMP; - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); break; } - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); + + /* we come here if we didn't find any entry in this shard */ + shard = ++ctx->tree_head; + if (shard < CONFIG_HAP_TBL_BUCKETS) + break; // try again on new shard + + /* fall through next table */ + shard = ctx->tree_head = 0; } } ctx->t = ctx->t->next; @@ -5275,7 +5639,7 @@ static int cli_io_handler_table(struct appctx *appctx) HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ctx->entry->lock); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); HA_ATOMIC_DEC(&ctx->entry->ref_cnt); eb = ebmb_next(&ctx->entry->key); @@ -5287,7 +5651,7 @@ static int cli_io_handler_table(struct appctx *appctx) else if (!skip_entry && !ctx->entry->ref_cnt) __stksess_kill(ctx->t, old); HA_ATOMIC_INC(&ctx->entry->ref_cnt); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); break; } @@ -5297,9 +5661,13 @@ static int cli_io_handler_table(struct appctx *appctx) else if (!skip_entry && !HA_ATOMIC_LOAD(&ctx->entry->ref_cnt)) __stksess_kill(ctx->t, ctx->entry); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); - ctx->t = ctx->t->next; + shard = ++ctx->tree_head; + if (shard >= CONFIG_HAP_TBL_BUCKETS) { + shard = ctx->tree_head = 0; + ctx->t = ctx->t->next; + } ctx->state = STATE_NEXT; break; @@ -5481,6 +5849,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "sc_get_gpc", smp_fetch_sc_get_gpc, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_get_gpc0", smp_fetch_sc_get_gpc0, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_get_gpc1", smp_fetch_sc_get_gpc1, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN }, + { "sc_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc_glitch_rate", smp_fetch_sc_glitch_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_gpc_rate", smp_fetch_sc_gpc_rate, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, @@ -5509,6 +5879,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "sc0_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc0_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc0_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, @@ -5536,6 +5908,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "sc1_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc1_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc1_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, @@ -5562,6 +5936,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "sc2_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc2_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc2_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, @@ -5591,6 +5967,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "src_get_gpc", smp_fetch_sc_get_gpc, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, + { "src_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, + { "src_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_gpc_rate", smp_fetch_sc_gpc_rate, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, @@ -5632,6 +6010,8 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "table_gpc_rate", sample_conv_table_gpc_rate, ARG2(2,SINT,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_gpc0_rate", sample_conv_table_gpc0_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_gpc1_rate", sample_conv_table_gpc1_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, + { "table_glitch_cnt", sample_conv_table_glitch_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, + { "table_glitch_rate", sample_conv_table_glitch_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_http_err_cnt", sample_conv_table_http_err_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_http_err_rate", sample_conv_table_http_err_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_http_fail_cnt", sample_conv_table_http_fail_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, @@ -5656,3 +6036,73 @@ static struct cfg_kw_list cfg_kws = {{ },{ }}; INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); + + +#if defined(USE_PROMEX) + +static int stk_promex_metric_info(unsigned int id, struct promex_metric *metric, struct ist *desc) +{ + switch (id) { + case STICKTABLE_SIZE: + *metric = (struct promex_metric){ .n = ist("size"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_MODULE_METRIC }; + *desc = ist("Stick table size."); + break; + case STICKTABLE_USED: + *metric = (struct promex_metric){ .n = ist("used"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_MODULE_METRIC }; + *desc = ist("Number of entries used in this stick table."); + break; + default: + return -1; + } + return 1; +} + +static void *stk_promex_start_ts(void *unused, unsigned int id) +{ + return stktables_list; +} + +static void *stk_promex_next_ts(void *unused, void *metric_ctx, unsigned int id) +{ + struct stktable *t = metric_ctx; + + return t->next; +} + +static int stk_promex_fill_ts(void *unused, void *metric_ctx, unsigned int id, struct promex_label *labels, struct field *field) +{ + struct stktable *t = metric_ctx; + + if (!t->size) + return 0; + + labels[0].name = ist("name"); + labels[0].value = ist(t->id); + labels[1].name = ist("type"); + labels[1].value = ist(stktable_types[t->type].kw); + + switch (id) { + case STICKTABLE_SIZE: + *field = mkf_u32(FN_GAUGE, t->size); + break; + case STICKTABLE_USED: + *field = mkf_u32(FN_GAUGE, t->current); + break; + default: + return -1; + } + return 1; +} + +static struct promex_module promex_sticktable_module = { + .name = IST("sticktable"), + .metric_info = stk_promex_metric_info, + .start_ts = stk_promex_start_ts, + .next_ts = stk_promex_next_ts, + .fill_ts = stk_promex_fill_ts, + .nb_metrics = STICKTABLE_TOTAL_FIELDS, +}; + +INITCALL1(STG_REGISTER, promex_register_module, &promex_sticktable_module); + +#endif |