summaryrefslogtreecommitdiffstats
path: root/third-party/tommyds/tommyhashlin.h
diff options
context:
space:
mode:
Diffstat (limited to 'third-party/tommyds/tommyhashlin.h')
-rw-r--r--third-party/tommyds/tommyhashlin.h350
1 files changed, 350 insertions, 0 deletions
diff --git a/third-party/tommyds/tommyhashlin.h b/third-party/tommyds/tommyhashlin.h
new file mode 100644
index 0000000..eaf59e6
--- /dev/null
+++ b/third-party/tommyds/tommyhashlin.h
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * Linear chained hashtable.
+ *
+ * This hashtable resizes dynamically and progressively using a variation of the
+ * linear hashing algorithm described in http://en.wikipedia.org/wiki/Linear_hashing
+ *
+ * It starts with the minimal size of 16 buckets, it doubles the size then it
+ * reaches a load factor greater than 0.5 and it halves the size with a load
+ * factor lower than 0.125.
+ *
+ * The progressive resize is good for real-time and interactive applications
+ * as it makes insert and delete operations taking always the same time.
+ *
+ * For resizing it's used a dynamic array that supports access to not contigous
+ * segments.
+ * In this way we only allocate additional table segments on the heap, without
+ * freeing the previous table, and then not increasing the heap fragmentation.
+ *
+ * The resize takes place inside tommy_hashlin_insert() and tommy_hashlin_remove().
+ * No resize is done in the tommy_hashlin_search() operation.
+ *
+ * To initialize the hashtable you have to call tommy_hashlin_init().
+ *
+ * \code
+ * tommy_hashslin hashlin;
+ *
+ * tommy_hashlin_init(&hashlin);
+ * \endcode
+ *
+ * To insert elements in the hashtable you have to call tommy_hashlin_insert() for
+ * each element.
+ * In the insertion call you have to specify the address of the node, the
+ * address of the object, and the hash value of the key to use.
+ * The address of the object is used to initialize the tommy_node::data field
+ * of the node, and the hash to initialize the tommy_node::key field.
+ *
+ * \code
+ * struct object {
+ * int value;
+ * // other fields
+ * tommy_node node;
+ * };
+ *
+ * struct object* obj = malloc(sizeof(struct object)); // creates the object
+ *
+ * obj->value = ...; // initializes the object
+ *
+ * tommy_hashlin_insert(&hashlin, &obj->node, obj, tommy_inthash_u32(obj->value)); // inserts the object
+ * \endcode
+ *
+ * To find and element in the hashtable you have to call tommy_hashtable_search()
+ * providing a comparison function, its argument, and the hash of the key to search.
+ *
+ * \code
+ * int compare(const void* arg, const void* obj)
+ * {
+ * return *(const int*)arg != ((const struct object*)obj)->value;
+ * }
+ *
+ * int value_to_find = 1;
+ * struct object* obj = tommy_hashlin_search(&hashlin, compare, &value_to_find, tommy_inthash_u32(value_to_find));
+ * if (!obj) {
+ * // not found
+ * } else {
+ * // found
+ * }
+ * \endcode
+ *
+ * To iterate over all the elements in the hashtable with the same key, you have to
+ * use tommy_hashlin_bucket() and follow the tommy_node::next pointer until NULL.
+ * You have also to check explicitely for the key, as the bucket may contains
+ * different keys.
+ *
+ * \code
+ * int value_to_find = 1;
+ * tommy_node* i = tommy_hashlin_bucket(&hashlin, tommy_inthash_u32(value_to_find));
+ * while (i) {
+ * struct object* obj = i->data; // gets the object pointer
+ *
+ * if (obj->value == value_to_find) {
+ * printf("%d\n", obj->value); // process the object
+ * }
+ *
+ * i = i->next; // goes to the next element
+ * }
+ * \endcode
+ *
+ * To remove an element from the hashtable you have to call tommy_hashlin_remove()
+ * providing a comparison function, its argument, and the hash of the key to search
+ * and remove.
+ *
+ * \code
+ * struct object* obj = tommy_hashlin_remove(&hashlin, compare, &value_to_remove, tommy_inthash_u32(value_to_remove));
+ * if (obj) {
+ * free(obj); // frees the object allocated memory
+ * }
+ * \endcode
+ *
+ * To destroy the hashtable you have to remove all the elements, and deinitialize
+ * the hashtable calling tommy_hashlin_done().
+ *
+ * \code
+ * tommy_hashlin_done(&hashlin);
+ * \endcode
+ *
+ * If you need to iterate over all the elements in the hashtable, you can use
+ * tommy_hashlin_foreach() or tommy_hashlin_foreach_arg().
+ * If you need a more precise control with a real iteration, you have to insert
+ * all the elements also in a ::tommy_list, and use the list to iterate.
+ * See the \ref multiindex example for more detail.
+ */
+
+#ifndef __TOMMYHASHLIN_H
+#define __TOMMYHASHLIN_H
+
+#include "tommyhash.h"
+
+/******************************************************************************/
+/* hashlin */
+
+/** \internal
+ * Initial and minimal size of the hashtable expressed as a power of 2.
+ * The initial size is 2^TOMMY_HASHLIN_BIT.
+ */
+#define TOMMY_HASHLIN_BIT 6
+
+/**
+ * Hashtable node.
+ * This is the node that you have to include inside your objects.
+ */
+typedef tommy_node tommy_hashlin_node;
+
+/** \internal
+ * Max number of elements as a power of 2.
+ */
+#define TOMMY_HASHLIN_BIT_MAX 32
+
+/**
+ * Hashtable container type.
+ * \note Don't use internal fields directly, but access the container only using functions.
+ */
+typedef struct tommy_hashlin_struct {
+ tommy_hashlin_node** bucket[TOMMY_HASHLIN_BIT_MAX]; /**< Dynamic array of hash buckets. One list for each hash modulus. */
+ tommy_uint_t bucket_bit; /**< Bits used in the bit mask. */
+ tommy_count_t bucket_max; /**< Number of buckets. */
+ tommy_count_t bucket_mask; /**< Bit mask to access the buckets. */
+ tommy_count_t low_max; /**< Low order max value. */
+ tommy_count_t low_mask; /**< Low order mask value. */
+ tommy_count_t split; /**< Split position. */
+ tommy_count_t count; /**< Number of elements. */
+ tommy_uint_t state; /**< Reallocation state. */
+} tommy_hashlin;
+
+/**
+ * Initializes the hashtable.
+ */
+void tommy_hashlin_init(tommy_hashlin* hashlin);
+
+/**
+ * Deinitializes the hashtable.
+ *
+ * You can call this function with elements still contained,
+ * but such elements are not going to be freed by this call.
+ */
+void tommy_hashlin_done(tommy_hashlin* hashlin);
+
+/**
+ * Inserts an element in the hashtable.
+ */
+void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void* data, tommy_hash_t hash);
+
+/**
+ * Searches and removes an element from the hashtable.
+ * You have to provide a compare function and the hash of the element you want to remove.
+ * If the element is not found, 0 is returned.
+ * If more equal elements are present, the first one is removed.
+ * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
+ * The function should return 0 for equal elements, anything other for different elements.
+ * \param cmp_arg Compare argument passed as first argument of the compare function.
+ * \param hash Hash of the element to find and remove.
+ * \return The removed element, or 0 if not found.
+ */
+void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash);
+
+/** \internal
+ * Returns the bucket at the specified position.
+ */
+tommy_inline tommy_hashlin_node** tommy_hashlin_pos(tommy_hashlin* hashlin, tommy_hash_t pos)
+{
+ tommy_uint_t bsr;
+
+ /* get the highest bit set, in case of all 0, return 0 */
+ bsr = tommy_ilog2_u32(pos | 1);
+
+ return &hashlin->bucket[bsr][pos];
+}
+
+/** \internal
+ * Returns a pointer to the bucket of the specified hash.
+ */
+tommy_inline tommy_hashlin_node** tommy_hashlin_bucket_ref(tommy_hashlin* hashlin, tommy_hash_t hash)
+{
+ tommy_count_t pos;
+ tommy_count_t high_pos;
+
+ pos = hash & hashlin->low_mask;
+ high_pos = hash & hashlin->bucket_mask;
+
+ /* if this position is already allocated in the high half */
+ if (pos < hashlin->split) {
+ /* The following assigment is expected to be implemented */
+ /* with a conditional move instruction */
+ /* that results in a little better and constant performance */
+ /* regardless of the split position. */
+ /* This affects mostly the worst case, when the split value */
+ /* is near at its half, resulting in a totally unpredictable */
+ /* condition by the CPU. */
+ /* In such case the use of the conditional move is generally faster. */
+
+ /* use also the high bit */
+ pos = high_pos;
+ }
+
+ return tommy_hashlin_pos(hashlin, pos);
+}
+
+/**
+ * Gets the bucket of the specified hash.
+ * The bucket is guaranteed to contain ALL the elements with the specified hash,
+ * but it can contain also others.
+ * You can access elements in the bucket following the ::next pointer until 0.
+ * \param hash Hash of the element to find.
+ * \return The head of the bucket, or 0 if empty.
+ */
+tommy_inline tommy_hashlin_node* tommy_hashlin_bucket(tommy_hashlin* hashlin, tommy_hash_t hash)
+{
+ return *tommy_hashlin_bucket_ref(hashlin, hash);
+}
+
+/**
+ * Searches an element in the hashtable.
+ * You have to provide a compare function and the hash of the element you want to find.
+ * If more equal elements are present, the first one is returned.
+ * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
+ * The function should return 0 for equal elements, anything other for different elements.
+ * \param cmp_arg Compare argument passed as first argument of the compare function.
+ * \param hash Hash of the element to find.
+ * \return The first element found, or 0 if none.
+ */
+tommy_inline void* tommy_hashlin_search(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash)
+{
+ tommy_hashlin_node* i = tommy_hashlin_bucket(hashlin, hash);
+
+ while (i) {
+ /* we first check if the hash matches, as in the same bucket we may have multiples hash values */
+ if (i->key == hash && cmp(cmp_arg, i->data) == 0)
+ return i->data;
+ i = i->next;
+ }
+ return 0;
+}
+
+/**
+ * Removes an element from the hashtable.
+ * You must already have the address of the element to remove.
+ * \return The tommy_node::data field of the node removed.
+ */
+void* tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node* node);
+
+/**
+ * Calls the specified function for each element in the hashtable.
+ *
+ * You cannot add or remove elements from the inside of the callback,
+ * but can use it to deallocate them.
+ *
+ * \code
+ * tommy_hashlin hashlin;
+ *
+ * // initializes the hashtable
+ * tommy_hashlin_init(&hashlin);
+ *
+ * ...
+ *
+ * // creates an object
+ * struct object* obj = malloc(sizeof(struct object));
+ *
+ * ...
+ *
+ * // insert it in the hashtable
+ * tommy_hashlin_insert(&hashlin, &obj->node, obj, tommy_inthash_u32(obj->value));
+ *
+ * ...
+ *
+ * // deallocates all the objects iterating the hashtable
+ * tommy_hashlin_foreach(&hashlin, free);
+ *
+ * // deallocates the hashtable
+ * tommy_hashlin_done(&hashlin);
+ * \endcode
+ */
+void tommy_hashlin_foreach(tommy_hashlin* hashlin, tommy_foreach_func* func);
+
+/**
+ * Calls the specified function with an argument for each element in the hashtable.
+ */
+void tommy_hashlin_foreach_arg(tommy_hashlin* hashlin, tommy_foreach_arg_func* func, void* arg);
+
+/**
+ * Gets the number of elements.
+ */
+tommy_inline tommy_count_t tommy_hashlin_count(tommy_hashlin* hashlin)
+{
+ return hashlin->count;
+}
+
+/**
+ * Gets the size of allocated memory.
+ * It includes the size of the ::tommy_hashlin_node of the stored elements.
+ */
+tommy_size_t tommy_hashlin_memory_usage(tommy_hashlin* hashlin);
+
+#endif
+