summaryrefslogtreecommitdiffstats
path: root/src/libutil/hash.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/libutil/hash.c716
1 files changed, 716 insertions, 0 deletions
diff --git a/src/libutil/hash.c b/src/libutil/hash.c
new file mode 100644
index 0000000..d2af88c
--- /dev/null
+++ b/src/libutil/hash.c
@@ -0,0 +1,716 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "hash.h"
+#include "util.h"
+#include "khash.h"
+
+/**
+ * LRU hashing
+ */
+
+static const guint log_base = 10;
+static const guint eviction_candidates = 16;
+static const gdouble lfu_base_value = 5.0;
+
+struct rspamd_lru_volatile_element_s;
+
+struct rspamd_lru_hash_s {
+ guint maxsize;
+ guint eviction_min_prio;
+ guint eviction_used;
+ struct rspamd_lru_element_s **eviction_pool;
+
+ GDestroyNotify value_destroy;
+ GDestroyNotify key_destroy;
+ GHashFunc hfunc;
+ GEqualFunc eqfunc;
+
+ khint_t n_buckets, size, n_occupied, upper_bound;
+ khint32_t *flags;
+ gpointer *keys;
+ struct rspamd_lru_volatile_element_s *vals;
+};
+
+enum rspamd_lru_element_flags {
+ RSPAMD_LRU_ELEMENT_NORMAL = 0,
+ RSPAMD_LRU_ELEMENT_VOLATILE = (1 << 0),
+ RSPAMD_LRU_ELEMENT_IMMORTAL = (1 << 1),
+};
+
+struct rspamd_lru_element_s {
+ guint16 last;
+ guint8 lg_usages;
+ guint8 eviction_pos;
+ guint8 flags;
+ gpointer data;
+};
+
+struct rspamd_lru_volatile_element_s {
+ struct rspamd_lru_element_s e;
+ time_t creation_time;
+ time_t ttl;
+};
+typedef struct rspamd_lru_volatile_element_s rspamd_lru_vol_element_t;
+
+#define TIME_TO_TS(t) ((guint16) (((t) / 60) & 0xFFFFU))
+
+static rspamd_lru_vol_element_t *
+rspamd_lru_hash_get(const rspamd_lru_hash_t *h, gconstpointer key)
+{
+ if (h->n_buckets) {
+ khint_t k, i, last, mask, step = 0;
+ mask = h->n_buckets - 1;
+ k = h->hfunc(key);
+ i = k & mask;
+ last = i;
+
+ while (!__ac_isempty(h->flags, i) &&
+ (__ac_isdel(h->flags, i) || !h->eqfunc(h->keys[i], key))) {
+ i = (i + (++step)) & mask;
+ if (i == last) {
+ return NULL;
+ }
+ }
+
+ return __ac_iseither(h->flags, i) ? NULL : &h->vals[i];
+ }
+
+ return NULL;
+}
+
+static int
+rspamd_lru_hash_resize(rspamd_lru_hash_t *h,
+ khint_t new_n_buckets)
+{
+ /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */
+ khint32_t *new_flags = 0;
+ khint_t j = 1;
+
+ kroundup32(new_n_buckets);
+ if (new_n_buckets < 4) {
+ new_n_buckets = 4;
+ }
+
+ if (h->size >= (khint_t) (new_n_buckets * __ac_HASH_UPPER + 0.5)) {
+ j = 0;
+ /* requested size is too small */
+ }
+ else {
+ /* hash table size to be changed (shrink or expand); rehash */
+ new_flags = (khint32_t *) g_malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t));
+
+ if (!new_flags) {
+ return -1;
+ }
+
+ memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t));
+ if (h->n_buckets < new_n_buckets) {
+ /* expand */
+ gpointer *new_keys = (gpointer *) g_realloc((void *) h->keys,
+ new_n_buckets * sizeof(gpointer));
+
+ if (!new_keys) {
+ g_free(new_flags);
+ return -1;
+ }
+
+ h->keys = new_keys;
+ rspamd_lru_vol_element_t *new_vals =
+ (rspamd_lru_vol_element_t *) g_realloc((void *) h->vals,
+ new_n_buckets * sizeof(rspamd_lru_vol_element_t));
+ if (!new_vals) {
+ g_free(new_flags);
+ return -1;
+ }
+
+ h->vals = new_vals;
+ }
+ /* Shrink */
+ }
+
+ if (j) {
+ /* rehashing is needed */
+ h->eviction_used = 0;
+
+ for (j = 0; j != h->n_buckets; ++j) {
+ if (__ac_iseither(h->flags, j) == 0) {
+ gpointer key = h->keys[j];
+ rspamd_lru_vol_element_t val;
+ khint_t new_mask;
+ new_mask = new_n_buckets - 1;
+ val = h->vals[j];
+ val.e.eviction_pos = (guint8) -1;
+ __ac_set_isdel_true(h->flags, j);
+
+ while (1) { /* kick-out process; sort of like in Cuckoo hashing */
+ khint_t k, i, step = 0;
+ k = h->hfunc(key);
+ i = k & new_mask;
+
+ while (!__ac_isempty(new_flags, i)) {
+ i = (i + (++step)) & new_mask;
+ }
+
+ __ac_set_isempty_false(new_flags, i);
+
+ if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) {
+ /* kick out the existing element */
+ {
+ gpointer tmp = h->keys[i];
+ h->keys[i] = key;
+ key = tmp;
+ }
+ {
+ rspamd_lru_vol_element_t tmp = h->vals[i];
+ h->vals[i] = val;
+ val = tmp;
+ val.e.eviction_pos = (guint8) -1;
+ }
+ __ac_set_isdel_true(h->flags, i);
+ /* mark it as deleted in the old hash table */
+ }
+ else { /* write the element and jump out of the loop */
+ h->keys[i] = key;
+ h->vals[i] = val;
+ break;
+ }
+ }
+ }
+ }
+
+ if (h->n_buckets > new_n_buckets) {
+ /* shrink the hash table */
+ h->keys = (gpointer *) g_realloc((void *) h->keys,
+ new_n_buckets * sizeof(gpointer));
+ h->vals = (rspamd_lru_vol_element_t *) g_realloc((void *) h->vals,
+ new_n_buckets * sizeof(rspamd_lru_vol_element_t));
+ }
+
+ g_free(h->flags); /* free the working space */
+ h->flags = new_flags;
+ h->n_buckets = new_n_buckets;
+ h->n_occupied = h->size;
+ h->upper_bound = (khint_t) (h->n_buckets * __ac_HASH_UPPER + 0.5);
+ }
+
+ return 0;
+}
+
+static rspamd_lru_vol_element_t *
+rspamd_lru_hash_put(rspamd_lru_hash_t *h, gpointer key, int *ret)
+{
+ khint_t x;
+
+ if (h->n_occupied >= h->upper_bound) {
+ /* update the hash table */
+ if (h->n_buckets > (h->size << 1)) {
+ if (rspamd_lru_hash_resize(h, h->n_buckets - 1) < 0) {
+ /* clear "deleted" elements */
+ *ret = -1;
+ return NULL;
+ }
+ }
+ else if (rspamd_lru_hash_resize(h, h->n_buckets + 1) < 0) {
+ /* expand the hash table */
+ *ret = -1;
+ return NULL;
+ }
+ }
+
+ khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0;
+ x = site = h->n_buckets;
+ k = h->hfunc(key);
+ i = k & mask;
+
+ if (__ac_isempty(h->flags, i)) {
+ x = i; /* for speed up */
+ }
+ else {
+ last = i;
+ while (!__ac_isempty(h->flags, i) &&
+ (__ac_isdel(h->flags, i) ||
+ !h->eqfunc(h->keys[i], key))) {
+ if (__ac_isdel(h->flags, i)) {
+ site = i;
+ }
+
+ i = (i + (++step)) & mask;
+
+ if (i == last) {
+ x = site;
+ break;
+ }
+ }
+
+ if (x == h->n_buckets) {
+ if (__ac_isempty(h->flags, i) && site != h->n_buckets) {
+ x = site;
+ }
+ else {
+ x = i;
+ }
+ }
+ }
+
+ if (__ac_isempty(h->flags, x)) { /* not present at all */
+ h->keys[x] = key;
+ __ac_set_isboth_false(h->flags, x);
+ ++h->size;
+ ++h->n_occupied;
+ *ret = 1;
+ }
+ else if (__ac_isdel(h->flags, x)) { /* deleted */
+ h->keys[x] = key;
+ __ac_set_isboth_false(h->flags, x);
+ ++h->size;
+ *ret = 2;
+ }
+ else {
+ /* Don't touch h->keys[x] if present and not deleted */
+ *ret = 0;
+ }
+
+ return &h->vals[x];
+}
+
+static void
+rspamd_lru_hash_del(rspamd_lru_hash_t *h, rspamd_lru_vol_element_t *elt)
+{
+ khint_t x = elt - h->vals;
+
+ if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {
+ __ac_set_isdel_true(h->flags, x);
+ --h->size;
+
+ if (h->key_destroy) {
+ h->key_destroy(h->keys[x]);
+ }
+
+ if (h->value_destroy) {
+ h->value_destroy(elt->e.data);
+ }
+ }
+}
+
+static void
+rspamd_lru_hash_remove_evicted(rspamd_lru_hash_t *hash,
+ rspamd_lru_element_t *elt)
+{
+ guint i;
+ rspamd_lru_element_t *cur;
+
+ g_assert(hash->eviction_used > 0);
+ g_assert(elt->eviction_pos < hash->eviction_used);
+
+ memmove(&hash->eviction_pool[elt->eviction_pos],
+ &hash->eviction_pool[elt->eviction_pos + 1],
+ sizeof(rspamd_lru_element_t *) *
+ (eviction_candidates - elt->eviction_pos - 1));
+
+ hash->eviction_used--;
+
+ if (hash->eviction_used > 0) {
+ /* We also need to update min_prio and renumber eviction list */
+ hash->eviction_min_prio = G_MAXUINT;
+
+ for (i = 0; i < hash->eviction_used; i++) {
+ cur = hash->eviction_pool[i];
+
+ if (hash->eviction_min_prio > cur->lg_usages) {
+ hash->eviction_min_prio = cur->lg_usages;
+ }
+
+ cur->eviction_pos = i;
+ }
+ }
+ else {
+ hash->eviction_min_prio = G_MAXUINT;
+ }
+}
+
+static void
+rspamd_lru_hash_update_counter(rspamd_lru_element_t *elt)
+{
+ guint8 counter = elt->lg_usages;
+
+ if (counter != 255) {
+ double r, baseval, p;
+
+ r = rspamd_random_double_fast();
+ baseval = counter - lfu_base_value;
+
+ if (baseval < 0) {
+ baseval = 0;
+ }
+
+ p = 1.0 / (baseval * log_base + 1);
+
+ if (r < p) {
+ elt->lg_usages++;
+ }
+ }
+}
+
+static inline void
+rspamd_lru_hash_decrease_counter(rspamd_lru_element_t *elt, time_t now)
+{
+ if (now - elt->last > lfu_base_value) {
+ /* Penalise counters for outdated records */
+ elt->lg_usages /= 2;
+ }
+}
+
+static gboolean
+rspamd_lru_hash_maybe_evict(rspamd_lru_hash_t *hash,
+ rspamd_lru_element_t *elt)
+{
+ guint i;
+ rspamd_lru_element_t *cur;
+
+ if (elt->eviction_pos == (guint8) -1) {
+ if (hash->eviction_used < eviction_candidates) {
+ /* There are free places in eviction pool */
+ hash->eviction_pool[hash->eviction_used] = elt;
+ elt->eviction_pos = hash->eviction_used;
+ hash->eviction_used++;
+
+ if (hash->eviction_min_prio > elt->lg_usages) {
+ hash->eviction_min_prio = elt->lg_usages;
+ }
+
+ return TRUE;
+ }
+ else {
+ /* Find any candidate that has higher usage count */
+ for (i = 0; i < hash->eviction_used; i++) {
+ cur = hash->eviction_pool[i];
+
+ if (cur->lg_usages > elt->lg_usages) {
+ cur->eviction_pos = -1;
+ elt->eviction_pos = i;
+ hash->eviction_pool[i] = elt;
+
+ if (hash->eviction_min_prio > elt->lg_usages) {
+ hash->eviction_min_prio = elt->lg_usages;
+ }
+
+ return TRUE;
+ }
+ }
+ }
+ }
+ else {
+ /* Already in the eviction list */
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void
+rspamd_lru_hash_remove_node(rspamd_lru_hash_t *hash, rspamd_lru_element_t *elt)
+{
+ if (elt->eviction_pos != (guint8) -1) {
+ rspamd_lru_hash_remove_evicted(hash, elt);
+ }
+
+ rspamd_lru_hash_del(hash, (rspamd_lru_vol_element_t *) elt);
+}
+
+static void
+rspamd_lru_hash_evict(rspamd_lru_hash_t *hash, time_t now)
+{
+ double r;
+ guint i;
+ rspamd_lru_element_t *elt = NULL;
+ guint nexpired = 0;
+
+ /*
+ * We either evict one node from the eviction list
+ * or, at some probability scan all table and update eviction
+ * list first
+ */
+ r = rspamd_random_double_fast();
+
+ if (r < ((double) eviction_candidates) / hash->maxsize) {
+ /* Full hash scan */
+ rspamd_lru_vol_element_t *cur;
+ rspamd_lru_element_t *selected = NULL;
+
+ kh_foreach_value_ptr(hash, cur, {
+ rspamd_lru_element_t *node = &cur->e;
+
+ if (node->flags & RSPAMD_LRU_ELEMENT_IMMORTAL) {
+ continue;
+ }
+
+ if (node->flags & RSPAMD_LRU_ELEMENT_VOLATILE) {
+ /* If element is expired, just remove it */
+ if (now - cur->creation_time > cur->ttl) {
+ rspamd_lru_hash_remove_node(hash, node);
+
+ nexpired++;
+ continue;
+ }
+ }
+ else {
+ rspamd_lru_hash_decrease_counter(node, now);
+
+ if (rspamd_lru_hash_maybe_evict(hash, node)) {
+ if (selected && node->lg_usages < selected->lg_usages) {
+ selected = node;
+ }
+ else if (selected == NULL) {
+ selected = node;
+ }
+ }
+ }
+ });
+
+ if (selected) {
+ elt = selected;
+ }
+ }
+ else {
+ /* Fast random eviction */
+ for (i = 0; i < hash->eviction_used; i++) {
+ elt = hash->eviction_pool[i];
+
+ if (elt->lg_usages <= hash->eviction_min_prio) {
+ break;
+ }
+ }
+ }
+
+ /* Evict if nothing else has been cleaned */
+ if (elt && nexpired == 0) {
+ rspamd_lru_hash_remove_node(hash, elt);
+ }
+}
+
+rspamd_lru_hash_t *
+rspamd_lru_hash_new_full(gint maxsize,
+ GDestroyNotify key_destroy,
+ GDestroyNotify value_destroy,
+ GHashFunc hf,
+ GEqualFunc cmpf)
+{
+ rspamd_lru_hash_t *h;
+
+ if (maxsize < eviction_candidates * 2) {
+ maxsize = eviction_candidates * 2;
+ }
+
+ h = g_malloc0(sizeof(rspamd_lru_hash_t));
+ h->hfunc = hf;
+ h->eqfunc = cmpf;
+ h->eviction_pool = g_malloc0(sizeof(rspamd_lru_element_t *) *
+ eviction_candidates);
+ h->maxsize = maxsize;
+ h->value_destroy = value_destroy;
+ h->key_destroy = key_destroy;
+ h->eviction_min_prio = G_MAXUINT;
+
+ /* Preallocate some elements */
+ rspamd_lru_hash_resize(h, MIN(h->maxsize, 128));
+
+ return h;
+}
+
+rspamd_lru_hash_t *
+rspamd_lru_hash_new(gint maxsize,
+ GDestroyNotify key_destroy,
+ GDestroyNotify value_destroy)
+{
+ return rspamd_lru_hash_new_full(maxsize,
+ key_destroy, value_destroy,
+ rspamd_strcase_hash, rspamd_strcase_equal);
+}
+
+gpointer
+rspamd_lru_hash_lookup(rspamd_lru_hash_t *hash, gconstpointer key, time_t now)
+{
+ rspamd_lru_element_t *res;
+ rspamd_lru_vol_element_t *vnode;
+
+ vnode = rspamd_lru_hash_get(hash, (gpointer) key);
+ if (vnode != NULL) {
+ res = &vnode->e;
+
+ if (res->flags & RSPAMD_LRU_ELEMENT_VOLATILE) {
+ /* Check ttl */
+
+ if (now - vnode->creation_time > vnode->ttl) {
+ rspamd_lru_hash_remove_node(hash, res);
+
+ return NULL;
+ }
+ }
+
+ now = TIME_TO_TS(now);
+ res->last = MAX(res->last, now);
+ rspamd_lru_hash_update_counter(res);
+ rspamd_lru_hash_maybe_evict(hash, res);
+
+ return res->data;
+ }
+
+ return NULL;
+}
+
+gboolean
+rspamd_lru_hash_remove(rspamd_lru_hash_t *hash,
+ gconstpointer key)
+{
+ rspamd_lru_vol_element_t *res;
+
+ res = rspamd_lru_hash_get(hash, key);
+
+ if (res != NULL) {
+ rspamd_lru_hash_remove_node(hash, &res->e);
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void rspamd_lru_hash_insert(rspamd_lru_hash_t *hash,
+ gpointer key,
+ gpointer value,
+ time_t now,
+ guint ttl)
+{
+ rspamd_lru_element_t *node;
+ rspamd_lru_vol_element_t *vnode;
+ gint ret;
+
+ vnode = rspamd_lru_hash_put(hash, key, &ret);
+ node = &vnode->e;
+
+ if (ret == 0) {
+ /* Existing element, be careful about destructors */
+ if (hash->value_destroy) {
+ /* Remove old data */
+ hash->value_destroy(vnode->e.data);
+ }
+
+ if (hash->key_destroy) {
+ /* Here are dragons! */
+ goffset off = vnode - hash->vals;
+
+ hash->key_destroy(hash->keys[off]);
+ hash->keys[off] = key;
+ }
+ }
+
+
+ if (ttl == 0) {
+ node->flags = RSPAMD_LRU_ELEMENT_NORMAL;
+ }
+ else {
+ vnode->creation_time = now;
+ vnode->ttl = ttl;
+ node->flags = RSPAMD_LRU_ELEMENT_VOLATILE;
+ }
+
+ node->data = value;
+ node->lg_usages = (guint8) lfu_base_value;
+ node->last = TIME_TO_TS(now);
+ node->eviction_pos = (guint8) -1;
+
+ if (ret != 0) {
+ /* Also need to check maxsize */
+ if (kh_size(hash) >= hash->maxsize) {
+ node->flags |= RSPAMD_LRU_ELEMENT_IMMORTAL;
+ rspamd_lru_hash_evict(hash, now);
+ node->flags &= ~RSPAMD_LRU_ELEMENT_IMMORTAL;
+ }
+ }
+
+ rspamd_lru_hash_maybe_evict(hash, node);
+}
+
+void rspamd_lru_hash_destroy(rspamd_lru_hash_t *hash)
+{
+ if (hash) {
+ if (hash->key_destroy || hash->value_destroy) {
+ gpointer k;
+ rspamd_lru_vol_element_t cur;
+
+ kh_foreach(hash, k, cur, {
+ if (hash->key_destroy) {
+ hash->key_destroy(k);
+ }
+ if (hash->value_destroy) {
+ hash->value_destroy(cur.e.data);
+ }
+ });
+ }
+
+ g_free(hash->keys);
+ g_free(hash->vals);
+ g_free(hash->flags);
+ g_free(hash->eviction_pool);
+ g_free(hash);
+ }
+}
+
+gpointer
+rspamd_lru_hash_element_data(rspamd_lru_element_t *elt)
+{
+ return elt->data;
+}
+
+int rspamd_lru_hash_foreach(rspamd_lru_hash_t *h, int it, gpointer *k,
+ gpointer *v)
+{
+ gint i;
+ g_assert(it >= 0);
+
+ for (i = it; i != kh_end(h); ++i) {
+ if (!kh_exist(h, i)) {
+ continue;
+ }
+
+ *k = h->keys[i];
+ *v = h->vals[i].e.data;
+
+ break;
+ }
+
+ if (i == kh_end(h)) {
+ return -1;
+ }
+
+ return i + 1;
+}
+
+
+guint rspamd_lru_hash_size(rspamd_lru_hash_t *hash)
+{
+ return kh_size(hash);
+}
+
+/**
+ * Returns hash capacity
+ * @param hash hash object
+ */
+guint rspamd_lru_hash_capacity(rspamd_lru_hash_t *hash)
+{
+ return hash->maxsize;
+} \ No newline at end of file