summaryrefslogtreecommitdiffstats
path: root/src/util/binhash.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/util/binhash.c447
1 files changed, 447 insertions, 0 deletions
diff --git a/src/util/binhash.c b/src/util/binhash.c
new file mode 100644
index 0000000..fd9bc87
--- /dev/null
+++ b/src/util/binhash.c
@@ -0,0 +1,447 @@
+/*++
+/* NAME
+/* binhash 3
+/* SUMMARY
+/* hash table manager
+/* SYNOPSIS
+/* #include <binhash.h>
+/*
+/* typedef struct {
+/* .in +4
+/* void *key;
+/* ssize_t key_len;
+/* void *value;
+/* /* private fields... */
+/* .in -4
+/* } BINHASH_INFO;
+/*
+/* BINHASH *binhash_create(size)
+/* ssize_t size;
+/*
+/* BINHASH_INFO *binhash_enter(table, key, key_len, value)
+/* BINHASH *table;
+/* const void *key;
+/* ssize_t key_len;
+/* void *value;
+/*
+/* char *binhash_find(table, key, key_len)
+/* BINHASH *table;
+/* const void *key;
+/* ssize_t key_len;
+/*
+/* BINHASH_INFO *binhash_locate(table, key, key_len)
+/* BINHASH *table;
+/* const void *key;
+/* ssize_t key_len;
+/*
+/* void binhash_delete(table, key, key_len, free_fn)
+/* BINHASH *table;
+/* const void *key;
+/* ssize_t key_len;
+/* void (*free_fn)(void *);
+/*
+/* void binhash_free(table, free_fn)
+/* BINHASH *table;
+/* void (*free_fn)(void *);
+/*
+/* void binhash_walk(table, action, ptr)
+/* BINHASH *table;
+/* void (*action)(BINHASH_INFO *info, void *ptr);
+/* void *ptr;
+/*
+/* BINHASH_INFO **binhash_list(table)
+/* BINHASH *table;
+/*
+/* BINHASH_INFO *binhash_sequence(table, how)
+/* BINHASH *table;
+/* int how;
+/* DESCRIPTION
+/* This module maintains one or more hash tables. Each table entry
+/* consists of a unique binary-valued lookup key and a generic
+/* character-pointer value.
+/* The tables are automatically resized when they fill up. When the
+/* values to be remembered are not character pointers, proper casts
+/* should be used or the code will not be portable.
+/*
+/* binhash_create() creates a table of the specified size and returns a
+/* pointer to the result. The lookup keys are saved with mymemdup().
+/*
+/* binhash_enter() stores a (key, value) pair into the specified table
+/* and returns a pointer to the resulting entry. The code does not
+/* check if an entry with that key already exists: use binhash_locate()
+/* for updating an existing entry. The key is copied; the value is not.
+/*
+/* binhash_find() returns the value that was stored under the given key,
+/* or a null pointer if it was not found. In order to distinguish
+/* a null value from a non-existent value, use binhash_locate().
+/*
+/* binhash_locate() returns a pointer to the entry that was stored
+/* for the given key, or a null pointer if it was not found.
+/*
+/* binhash_delete() removes one entry that was stored under the given key.
+/* If the free_fn argument is not a null pointer, the corresponding
+/* function is called with as argument the value that was stored under
+/* the key.
+/*
+/* binhash_free() destroys a hash table, including contents. If the free_fn
+/* argument is not a null pointer, the corresponding function is called
+/* for each table entry, with as argument the value that was stored
+/* with the entry.
+/*
+/* binhash_walk() invokes the action function for each table entry, with
+/* a pointer to the entry as its argument. The ptr argument is passed
+/* on to the action function.
+/*
+/* binhash_list() returns a null-terminated list of pointers to
+/* all elements in the named table. The list should be passed to
+/* myfree().
+/*
+/* binhash_sequence() returns the first or next element
+/* depending on the value of the "how" argument. Specify
+/* BINHASH_SEQ_FIRST to start a new sequence, BINHASH_SEQ_NEXT
+/* to continue, and BINHASH_SEQ_STOP to terminate a sequence
+/* early. The caller must not delete an element before it is
+/* visited.
+/* RESTRICTIONS
+/* A callback function should not modify the hash table that is
+/* specified to its caller.
+/* DIAGNOSTICS
+/* The following conditions are reported and cause the program to
+/* terminate immediately: memory allocation failure; an attempt
+/* to delete a non-existent entry.
+/* SEE ALSO
+/* mymalloc(3) memory management wrapper
+/* hash_fnv(3) Fowler/Noll/Vo hash function
+/* LICENSE
+/* .ad
+/* .fi
+/* The Secure Mailer license must be distributed with this software.
+/* AUTHOR(S)
+/* Wietse Venema
+/* IBM T.J. Watson Research
+/* P.O. Box 704
+/* Yorktown Heights, NY 10598, USA
+/*
+/* Wietse Venema
+/* Google, Inc.
+/* 111 8th Avenue
+/* New York, NY 10011, USA
+/*--*/
+
+/* C library */
+
+#include <sys_defs.h>
+#include <string.h>
+
+/* Local stuff */
+
+#include "mymalloc.h"
+#include "msg.h"
+#include "binhash.h"
+
+/* binhash_hash - hash a string */
+
+#ifndef NO_HASH_FNV
+#include "hash_fnv.h"
+
+#define binhash_hash(key, len, size) (hash_fnv((key), (len)) % (size))
+
+#else
+
+static size_t binhash_hash(const void *key, ssize_t len, size_t size)
+{
+ size_t h = 0;
+ size_t g;
+
+ /*
+ * From the "Dragon" book by Aho, Sethi and Ullman.
+ */
+
+ while (len-- > 0) {
+ h = (h << 4U) + *(unsigned const char *) key++;
+ if ((g = (h & 0xf0000000)) != 0) {
+ h ^= (g >> 24U);
+ h ^= g;
+ }
+ }
+ return (h % size);
+}
+
+#endif
+
+/* binhash_link - insert element into table */
+
+#define binhash_link(table, elm) { \
+ BINHASH_INFO **_h = table->data + binhash_hash(elm->key, elm->key_len, table->size);\
+ elm->prev = 0; \
+ if ((elm->next = *_h) != 0) \
+ (*_h)->prev = elm; \
+ *_h = elm; \
+ table->used++; \
+}
+
+/* binhash_size - allocate and initialize hash table */
+
+static void binhash_size(BINHASH *table, size_t size)
+{
+ BINHASH_INFO **h;
+
+ size |= 1;
+
+ table->data = h = (BINHASH_INFO **) mymalloc(size * sizeof(BINHASH_INFO *));
+ table->size = size;
+ table->used = 0;
+
+ while (size-- > 0)
+ *h++ = 0;
+}
+
+/* binhash_create - create initial hash table */
+
+BINHASH *binhash_create(ssize_t size)
+{
+ BINHASH *table;
+
+ table = (BINHASH *) mymalloc(sizeof(BINHASH));
+ binhash_size(table, size < 13 ? 13 : size);
+ table->seq_bucket = table->seq_element = 0;
+ return (table);
+}
+
+/* binhash_grow - extend existing table */
+
+static void binhash_grow(BINHASH *table)
+{
+ BINHASH_INFO *ht;
+ BINHASH_INFO *next;
+ ssize_t old_size = table->size;
+ BINHASH_INFO **h = table->data;
+ BINHASH_INFO **old_entries = h;
+
+ binhash_size(table, 2 * old_size);
+
+ while (old_size-- > 0) {
+ for (ht = *h++; ht; ht = next) {
+ next = ht->next;
+ binhash_link(table, ht);
+ }
+ }
+ myfree((void *) old_entries);
+}
+
+/* binhash_enter - enter (key, value) pair */
+
+BINHASH_INFO *binhash_enter(BINHASH *table, const void *key, ssize_t key_len, void *value)
+{
+ BINHASH_INFO *ht;
+
+ if (table->used >= table->size)
+ binhash_grow(table);
+ ht = (BINHASH_INFO *) mymalloc(sizeof(BINHASH_INFO));
+ ht->key = mymemdup(key, key_len);
+ ht->key_len = key_len;
+ ht->value = value;
+ binhash_link(table, ht);
+ return (ht);
+}
+
+/* binhash_find - lookup value */
+
+void *binhash_find(BINHASH *table, const void *key, ssize_t key_len)
+{
+ BINHASH_INFO *ht;
+
+#define KEY_EQ(x,y,l) (((unsigned char *) x)[0] == ((unsigned char *) y)[0] && memcmp(x,y,l) == 0)
+
+ if (table != 0)
+ for (ht = table->data[binhash_hash(key, key_len, table->size)]; ht; ht = ht->next)
+ if (key_len == ht->key_len && KEY_EQ(key, ht->key, key_len))
+ return (ht->value);
+ return (0);
+}
+
+/* binhash_locate - lookup entry */
+
+BINHASH_INFO *binhash_locate(BINHASH *table, const void *key, ssize_t key_len)
+{
+ BINHASH_INFO *ht;
+
+ if (table != 0)
+ for (ht = table->data[binhash_hash(key, key_len, table->size)]; ht; ht = ht->next)
+ if (key_len == ht->key_len && KEY_EQ(key, ht->key, key_len))
+ return (ht);
+ return (0);
+}
+
+/* binhash_delete - delete one entry */
+
+void binhash_delete(BINHASH *table, const void *key, ssize_t key_len, void (*free_fn) (void *))
+{
+ if (table != 0) {
+ BINHASH_INFO *ht;
+ BINHASH_INFO **h = table->data + binhash_hash(key, key_len, table->size);
+
+ for (ht = *h; ht; ht = ht->next) {
+ if (key_len == ht->key_len && KEY_EQ(key, ht->key, key_len)) {
+ if (ht->next)
+ ht->next->prev = ht->prev;
+ if (ht->prev)
+ ht->prev->next = ht->next;
+ else
+ *h = ht->next;
+ table->used--;
+ myfree(ht->key);
+ if (free_fn)
+ (*free_fn) (ht->value);
+ myfree((void *) ht);
+ return;
+ }
+ }
+ msg_panic("binhash_delete: unknown_key: \"%s\"", (char *) key);
+ }
+}
+
+/* binhash_free - destroy hash table */
+
+void binhash_free(BINHASH *table, void (*free_fn) (void *))
+{
+ if (table != 0) {
+ ssize_t i = table->size;
+ BINHASH_INFO *ht;
+ BINHASH_INFO *next;
+ BINHASH_INFO **h = table->data;
+
+ while (i-- > 0) {
+ for (ht = *h++; ht; ht = next) {
+ next = ht->next;
+ myfree(ht->key);
+ if (free_fn)
+ (*free_fn) (ht->value);
+ myfree((void *) ht);
+ }
+ }
+ myfree((void *) table->data);
+ table->data = 0;
+ if (table->seq_bucket)
+ myfree((void *) table->seq_bucket);
+ table->seq_bucket = 0;
+ myfree((void *) table);
+ }
+}
+
+/* binhash_walk - iterate over hash table */
+
+void binhash_walk(BINHASH *table, void (*action) (BINHASH_INFO *, void *),
+ void *ptr) {
+ if (table != 0) {
+ ssize_t i = table->size;
+ BINHASH_INFO **h = table->data;
+ BINHASH_INFO *ht;
+
+ while (i-- > 0)
+ for (ht = *h++; ht; ht = ht->next)
+ (*action) (ht, ptr);
+ }
+}
+
+/* binhash_list - list all table members */
+
+BINHASH_INFO **binhash_list(table)
+BINHASH *table;
+{
+ BINHASH_INFO **list;
+ BINHASH_INFO *member;
+ ssize_t count = 0;
+ ssize_t i;
+
+ if (table != 0) {
+ list = (BINHASH_INFO **) mymalloc(sizeof(*list) * (table->used + 1));
+ for (i = 0; i < table->size; i++)
+ for (member = table->data[i]; member != 0; member = member->next)
+ list[count++] = member;
+ } else {
+ list = (BINHASH_INFO **) mymalloc(sizeof(*list));
+ }
+ list[count] = 0;
+ return (list);
+}
+
+/* binhash_sequence - dict(3) compatibility iterator */
+
+BINHASH_INFO *binhash_sequence(BINHASH *table, int how)
+{
+ if (table == 0)
+ return (0);
+
+ switch (how) {
+ case BINHASH_SEQ_FIRST: /* start new sequence */
+ if (table->seq_bucket)
+ myfree((void *) table->seq_bucket);
+ table->seq_bucket = binhash_list(table);
+ table->seq_element = table->seq_bucket;
+ return (*(table->seq_element)++);
+ case BINHASH_SEQ_NEXT: /* next element */
+ if (table->seq_element && *table->seq_element)
+ return (*(table->seq_element)++);
+ /* FALLTHROUGH */
+ default: /* terminate sequence */
+ if (table->seq_bucket) {
+ myfree((void *) table->seq_bucket);
+ table->seq_bucket = table->seq_element = 0;
+ }
+ return (0);
+ }
+}
+
+#ifdef TEST
+#include <vstring_vstream.h>
+#include <myrand.h>
+
+int main(int unused_argc, char **unused_argv)
+{
+ VSTRING *buf = vstring_alloc(10);
+ ssize_t count = 0;
+ BINHASH *hash;
+ BINHASH_INFO **ht_info;
+ BINHASH_INFO **ht;
+ BINHASH_INFO *info;
+ ssize_t i;
+ ssize_t r;
+ int op;
+
+ /*
+ * Load a large number of strings including terminator, and delete them
+ * in a random order.
+ */
+ hash = binhash_create(10);
+ while (vstring_get(buf, VSTREAM_IN) != VSTREAM_EOF)
+ binhash_enter(hash, vstring_str(buf), VSTRING_LEN(buf) + 1,
+ CAST_INT_TO_VOID_PTR(count++));
+ if (count != hash->used)
+ msg_panic("%ld entries stored, but %lu entries exist",
+ (long) count, (unsigned long) hash->used);
+ for (i = 0, op = BINHASH_SEQ_FIRST; (info = binhash_sequence(hash, op)) != 0;
+ i++, op = BINHASH_SEQ_NEXT)
+ if (memchr(info->key, 0, info->key_len) == 0)
+ msg_panic("no null byte in lookup key");
+ if (i != hash->used)
+ msg_panic("%ld entries found, but %lu entries exist",
+ (long) i, (unsigned long) hash->used);
+ ht_info = binhash_list(hash);
+ for (i = 0; i < hash->used; i++) {
+ r = myrand() % hash->used;
+ info = ht_info[i];
+ ht_info[i] = ht_info[r];
+ ht_info[r] = info;
+ }
+ for (ht = ht_info; *ht; ht++)
+ binhash_delete(hash, ht[0]->key, ht[0]->key_len, (void (*) (void *)) 0);
+ if (hash->used > 0)
+ msg_panic("%ld entries not deleted", (long) hash->used);
+ myfree((void *) ht_info);
+ binhash_free(hash, (void (*) (void *)) 0);
+ vstring_free(buf);
+ return (0);
+}
+
+#endif