diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:54:46 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:54:46 +0000 |
commit | cd7b005519ade8ab6c97fcb21590b71b7d1be6e3 (patch) | |
tree | c611a8d0cd5e8f68f41b8c2d16ba580e0f40a38d /third-party/tommyds | |
parent | Initial commit. (diff) | |
download | librtr-cd7b005519ade8ab6c97fcb21590b71b7d1be6e3.tar.xz librtr-cd7b005519ade8ab6c97fcb21590b71b7d1be6e3.zip |
Adding upstream version 0.8.0.upstream/0.8.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third-party/tommyds')
36 files changed, 7004 insertions, 0 deletions
diff --git a/third-party/tommyds/AUTHORS b/third-party/tommyds/AUTHORS new file mode 100644 index 0000000..23071a0 --- /dev/null +++ b/third-party/tommyds/AUTHORS @@ -0,0 +1,9 @@ +TommyDS AUTHORS +=============== + +The author of TommyDS is Andrea Mazzoleni. + +You can contact me sending an email at: + + amadvance@gmail.com + diff --git a/third-party/tommyds/HISTORY b/third-party/tommyds/HISTORY new file mode 100644 index 0000000..ae56bc5 --- /dev/null +++ b/third-party/tommyds/HISTORY @@ -0,0 +1,90 @@ +TommyDS HISTORY +=============== + +2.2 2018/02 +=========== + * Removed tommy_list_remove_head_not_empty() as not used and wrongly + implemented [Daniel Roethlisberger]. + +2.1 2016/11 +=========== + * Added a new hash function for strings: tommy_strhash_u32(). + * Added a new tree implementation. It's not intended to be fast but useful if + you need elements in order. + * Fixed some references to TOMMY_ARRAYBLKOF_SIZE, where instead + TOMMY_ARRAYBLK_SIZE was incorrectly used [Rocco]. + +2.0 2014/12 +=========== + * Fixed a Segmentation Fault bug in the trie_inplace container when inserting + duplicate elements. + * Faster array and hashlin implementation when accessing elements. + * Added new hashtable functions to iterate over all the elements. + * Added a new tommy_calloc() function used for allocating initialized memory. + If you redefined tommy_malloc(), likely you have to redefine also tommy_calloc(). + * Reached 100% code coverage in the regression test. + * Different source code organization. + * Added benchmark comparison with Binary Search Tesseract by Gregorius van + den Hoven. + +1.8 2013/12 +=========== + * Fixed build of tommy_arrayblk in C++. + * Changed the default node size of tommy_trie to fit a cache line of 64 bytes. + * Added benchmark comparison with STX BTree. + +1.7 2013/12 +=========== + * Extends tommy_hashlin_done() to work also if the hashtable is not empty. + * Removes the empty tommy_trie_done() because the real deallocation is done + by the allocator. + +1.6 2013/11 +=========== + * Added a new tommy_arrayblk and tommy_arrayblkof types to store elements + in an array minimizing memory occupation. + +1.5 2013/06 +=========== + * Fixed inline declaration to allow building with clang. + * Added a new tommy_arrayof type to store in an array elements of arbitrary + size. + +1.4 2013/03 +=========== + * Added benchmark comparison with Google BTree, and C++ map and unordered_map. + * Benchmark for Linux is now compiled with "-O3 -march=pentium4 -mtune=generic", + and the Windows one with "/Ox /GL /GS- /arch:SSE2". + +1.3 2013/02 +=========== + * Fixed a Segmentation Fault bug in the hashlin container if exact power + of 2 sizes were used. + * Removed some warnings with newer gcc. + * Minor documentation changes. + * Added benchmark comparison with the judy array implementation by Karl Malbrain. + +1.2 2012/05 +=========== + * Minor documentation changes. + * In the check application, added a speed comparison with the C qsort() + implementation. + +1.1 2012/05 +=========== + * Fixed the tommy_hashdyn_remove() function. Now it shrinks the hashtable if required. + * Minor documentation changes. + +1.0 2011/03 +=========== + * First official version of TommyDS. + * Added tommy_list_foreach functions. + +0.2 2011/03 +=========== + * Added tommy_array. A dynamic vector. + +0.1 2011/01 +=========== + * First release of Tommy. + diff --git a/third-party/tommyds/INSTALL b/third-party/tommyds/INSTALL new file mode 100644 index 0000000..40e55ff --- /dev/null +++ b/third-party/tommyds/INSTALL @@ -0,0 +1,8 @@ +TommyDS INSTALL +=============== + +TommyDS doesn't need any installation. + +You have only to import the required .c and .h files into your program +and use the them. + diff --git a/third-party/tommyds/LICENSE b/third-party/tommyds/LICENSE new file mode 100644 index 0000000..6c86157 --- /dev/null +++ b/third-party/tommyds/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/third-party/tommyds/Makefile b/third-party/tommyds/Makefile new file mode 100644 index 0000000..a2a3d36 --- /dev/null +++ b/third-party/tommyds/Makefile @@ -0,0 +1,219 @@ +############################################################################# +# Tommy Makefile + +# Version of TommyDS +VERSION = 2.2 + +# Build options for the check program +ifdef COVERAGE +CFLAGS = -O0 -g -fprofile-arcs -ftest-coverage +else +CFLAGS = -O3 -march=native -Wall -Wextra -Wshadow -Wuninitialized -Wpadded -Wcast-align -Wcast-qual -g +endif + +# Build options for the benchmark +# -std=gnu++0x required by Google btree +BENCHCXXFLAGS = -m32 -O3 -march=native -flto -fpermissive -std=gnu++0x -Wall -g + +# Programs +CC ?= gcc +CXX ?= g++ +OBJDUMP ?= objdump +UNAME = $(shell uname) + +# Linux +ifeq ($(UNAME),Linux) +LIB=-lrt +BENCHLIB=benchmark/lib/judy/libJudyL.a benchmark/lib/judy/libJudyMalloc.a +EXE= +O=.o +endif + +# Darwin +ifeq ($(UNAME),Darwin) +LIB= +EXE= +O=.o +endif + +# Windows +ifeq ($(UNAME),) +BENCHLIB=benchmark/lib/judy/src/judy.lib +EXE=.exe +O=.obj +endif + +#CHECK = ./tommybench -n 1000000 -d tommy-hashlin +CHECK = ./tommycheck + +DEP = \ + tommyds/tommyalloc.c \ + tommyds/tommyalloc.h \ + tommyds/tommyarray.c \ + tommyds/tommyarray.h \ + tommyds/tommyarrayof.c \ + tommyds/tommyarrayof.h \ + tommyds/tommyarrayblk.c \ + tommyds/tommyarrayblk.h \ + tommyds/tommyarrayblkof.c \ + tommyds/tommyarrayblkof.h \ + tommyds/tommy.c \ + tommyds/tommy.h \ + tommyds/tommyhash.c \ + tommyds/tommyhashdyn.c \ + tommyds/tommyhashdyn.h \ + tommyds/tommyhash.h \ + tommyds/tommyhashlin.c \ + tommyds/tommyhashlin.h \ + tommyds/tommyhashtbl.c \ + tommyds/tommyhashtbl.h \ + tommyds/tommylist.c \ + tommyds/tommylist.h \ + tommyds/tommytrie.c \ + tommyds/tommytrie.h \ + tommyds/tommytrieinp.c \ + tommyds/tommytrieinp.h \ + tommyds/tommytypes.h \ + tommyds/tommychain.h + +DEPTEST = \ + check.c \ + benchmark.cc + +all: tommycheck$(EXE) + +bench: tommybench$(EXE) + +tommy$(O): $(DEP) + $(CC) $(CFLAGS) -c tommyds/tommy.c -o tommy$(O) + -$(OBJDUMP) -S tommy$(O) > tommy.s + +tommycheck$(EXE): check.c tommy$(O) + $(CC) $(CFLAGS) check.c tommy$(O) -o tommycheck$(EXE) $(LIB) + +tommybench$(EXE): benchmark.cc $(DEP) + $(CXX) $(BENCHCXXFLAGS) benchmark.cc -o tommybench$(EXE) $(LIB) $(BENCHLIB) + +check: tommycheck$(EXE) + ./tommycheck$(EXE) + echo Check completed with success! + +lcov_reset: + lcov -d . -z + rm -f ./lcov.info + +lcov_capture: + lcov -d . --capture -o lcov.info + +lcov_html: + rm -rf ./cov + mkdir cov + genhtml -o ./cov lcov.info + +coverage: + $(MAKE) COVERAGE=1 tommycheck$(EXE) + $(MAKE) lcov_reset + ./tommycheck$(EXE) + $(MAKE) lcov_capture + $(MAKE) lcov_html + +valgrind: + valgrind \ + --tool=memcheck \ + --track-origins=yes \ + --read-var-info=yes \ + -v $(CHECK) \ + 2> valgrind.log + tail valgrind.log + +callgrind: + valgrind \ + --tool=callgrind \ + --dump-instr=yes \ + --trace-jump=yes \ + -v $(CHECK) \ + 2> callgrind.log + tail callgrind.log + +cachegrind: + valgrind \ + --tool=cachegrind \ + -v $(CHECK) \ + 2> cachegrind.log + tail cachegrind.log + +phony: + +graph: phony + cd benchmark && sh gr_all.sh + +doc: phony tommy.doxygen tommy.css $(DEP) + rm -rf doc + mkdir doc + cp -a benchmark/data/def doc/def + cp -a benchmark/data/other doc/other + cp -a benchmark/data/core_i5_650_3G2_linux doc/core_i5_650_3G2_linux + rm -f doc/*/*.lst + rm -f doc/*/*.gnu + doxygen tommy.doxygen + rm -f doc/doxygen.png + rm -f doc/tab_*.png + +web: phony tommyweb.doxygen tommy.css $(DEP) + rm -rf web + mkdir web + cp -a benchmark/data/def web/def + cp -a benchmark/data/other web/other + cp -a benchmark/data/core_i5_650_3G2_linux web/core_i5_650_3G2_linux + rm -f web/*/*.lst + rm -f web/*/*.gnu + doxygen tommyweb.doxygen + rm -f web/doxygen.png + rm -f web/tab_*.png + +clean: + rm -f *.log *.s *.lst *.o + rm -f *.ncb *.suo *.obj + rm -f *.gcno *.gcda lcov.info + rm -rf Debug Release x64 + rm -f callgrind.out.* + rm -f cachegrind.out.* + +distclean: clean + rm -f tommybench$(EXE) tommycheck$(EXE) + +maintainerclean: distclean + rm -rf doc web + +DIST=tommyds-$(VERSION) + +DISTFILES=\ + Makefile \ + README LICENSE AUTHORS INSTALL HISTORY \ + tommy.doxygen tommy.css tommy-header.html tommy-footer.html \ + benchmark.vcxproj benchmark.sln \ + benchmark.geany \ + benchmark.cc \ + check.c + +dist: + mkdir $(DIST) + mkdir $(DIST)/tommyds + cp $(DISTFILES) $(DIST) + cp $(DEP) $(DIST)/tommyds + cp $(DEPTEST) $(DIST) + cp -R doc $(DIST) + cp -R benchmark $(DIST)/benchmark + rm -f $(DIST)/benchmark/data/*/*.png + rm -rf $(DIST)/benchmark/data/test + rm -f $(DIST)/benchmark/arial.ttf + rm -f $(DIST).tar.gz + tar cfzo $(DIST).tar.gz $(DIST) + rm -f $(DIST).zip + zip -r $(DIST).zip $(DIST) + rm -r $(DIST) + +distcheck: dist + tar zxvf $(DIST).tar.gz + cd $(DIST) && make check + rm -rf $(DIST) diff --git a/third-party/tommyds/README b/third-party/tommyds/README new file mode 100644 index 0000000..c0fa3ec --- /dev/null +++ b/third-party/tommyds/README @@ -0,0 +1,31 @@ +TommyDS +======= + +TommyDS is a C library of array, hashtables and tries data structures, +designed for high performance and providing an easy to use interface. + +It's faster than all the similar libraries like rbtree, judy, goodledensehash, +khash, uthash, nedtries and others. + +The data structures provided are: + + tommy_list - A double linked list. + tommy_array - A linear array. It doesn't fragment + the heap. + tommy_arrayblk - A blocked linear array. It doesn't fragment + the heap and it minimizes the space occupation. + tommy_hashtable - A fixed size chained hashtable. + tommy_hashdyn - A dynamic chained hashtable. + tommy_hashlin - A linear chained hashtable. It doesn't have the + problem of the delay when resizing and it doesn't + fragment the heap. + tommy_trie - A trie optimized for cache utilization. + tommy_trie_inplace - A trie completely inplace. + +The documentation is available in HTML format in the doc/index.html file, +and directly in the .h files. + +The official site of TommyDS is: + + http://www.tommyds.it + diff --git a/third-party/tommyds/tommy.c b/third-party/tommyds/tommy.c new file mode 100644 index 0000000..a05013a --- /dev/null +++ b/third-party/tommyds/tommy.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommyhash.c" +#include "tommyalloc.c" +#include "tommyarray.c" +#include "tommyarrayof.c" +#include "tommyarrayblk.c" +#include "tommyarrayblkof.c" +#include "tommylist.c" +#include "tommytree.c" +#include "tommytrie.c" +#include "tommytrieinp.c" +#include "tommyhashtbl.c" +#include "tommyhashdyn.c" +#include "tommyhashlin.c" + diff --git a/third-party/tommyds/tommy.h b/third-party/tommyds/tommy.h new file mode 100644 index 0000000..08b0a75 --- /dev/null +++ b/third-party/tommyds/tommy.h @@ -0,0 +1,763 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \mainpage + * \section Introduction + * Tommy is a C library of array, hashtables and tries data structures, + * designed for high performance and providing an easy to use interface. + * + * It's <b>faster</b> than all the similar libraries like + * <a href="http://www.canonware.com/rb/">rbtree</a>, + * <a href="http://judy.sourceforge.net/">judy</a>, + * <a href="http://code.google.com/p/cpp-btree/">googlebtree</a>, + * <a href="http://panthema.net/2007/stx-btree/">stxbtree</a>, + * <a href="http://attractivechaos.awardspace.com/">khash</a>, + * <a href="http://uthash.sourceforge.net/">uthash</a>, + * <a href="http://www.nedprod.com/programs/portable/nedtries/">nedtrie</a>, + * <a href="http://code.google.com/p/judyarray/">judyarray</a>, + * <a href="http://concurrencykit.org/">concurrencykit</a> and others. + * Only <a href="http://code.google.com/p/google-sparsehash/">googledensehash</a> is a real competitor for Tommy. + * + * The data structures provided are: + * + * - ::tommy_list - A double linked list. + * - ::tommy_array, ::tommy_arrayof - A linear array. + * It doesn't fragment the heap. + * - ::tommy_arrayblk, ::tommy_arrayblkof - A blocked linear array. + * It doesn't fragment the heap and it minimizes the space occupation. + * - ::tommy_hashtable - A fixed size chained hashtable. + * - ::tommy_hashdyn - A dynamic chained hashtable. + * - ::tommy_hashlin - A linear chained hashtable. + * It doesn't have the problem of the delay when resizing and + * it doesn't fragment the heap. + * - ::tommy_trie - A trie optimized for cache utilization. + * - ::tommy_trie_inplace - A trie completely inplace. + * - ::tommy_tree - A tree to keep elements in order. + * + * The most interesting are ::tommy_array, ::tommy_hashdyn, ::tommy_hashlin, ::tommy_trie and ::tommy_trie_inplace. + * + * The official site of TommyDS is <a href="http://www.tommyds.it/">http://www.tommyds.it/</a>, + * + * \section Use + * + * All the Tommy containers are used to store pointers to generic objects, associated to an + * integer value, that could be a key or a hash value. + * + * They are semantically equivalent at the C++ <a href="http://www.cplusplus.com/reference/map/multimap/">multimap\<unsigned,void*\></a> + * and <a href="http://www.cplusplus.com/reference/unordered_map/unordered_multimap/">unordered_multimap\<unsigned,void*\></a>. + * + * An object, to be inserted in a container, should contain a node of type ::tommy_node. + * Inside this node is present a pointer to the object itself in the tommy_node::data field, + * the key used to identify the object in the tommy_node::key field, and other fields used + * by the containers. + * + * This is a typical object declaration: + * \code + * struct object { + * // other fields + * tommy_node node; + * }; + * \endcode + * + * To insert an object in a container, you have to provide the address of the embedded node, + * the address of the object and the value of the key. + * \code + * int key_to_insert = 1; + * struct object* obj = malloc(sizeof(struct object)); + * ... + * tommy_trie_insert(..., &obj->node, obj, key_to_insert); + * \endcode + * + * To search an object you have to provide the key and call the search function. + * \code + * int key_to_find = 1; + * struct object* obj = tommy_trie_search(..., key_to_find); + * if (obj) { + * // found + * } + * \endcode + * + * To access all the objects with the same keys you have to iterate over the bucket + * assigned at the specified key. + * \code + * int key_to_find = 1; + * tommy_trie_node* i = tommy_trie_bucket(..., key_to_find); + * + * while (i) { + * struct object* obj = i->data; // gets the object pointer + * + * printf("%d\n", obj->value); // process the object + * + * i = i->next; // goes to the next element + * } + * \endcode + * + * To remove an object you have to provide the key and call the remove function. + * \code + * int key_to_remove = 1; + * struct object* obj = tommy_trie_remove(..., key_to_remove); + * if (obj) { + * // found + * free(obj); // frees the object allocated memory + * } + * \endcode + * + * Dealing with hashtables, instead of the key, you have to provide the hash + * value of the object, and a compare function able to differentiate objects with + * the same hash value. + * To compute the hash value, you can use the generic tommy_hash_u32() function, + * or the specialized integer hash function tommy_inthash_u32(). + * + * \section Features + * + * Tommy is fast and easy to use. + * + * Tommy is portable to all platforms and operating systems. + * + * Tommy containers support multiple elements with the same key. + * + * Tommy containers keep the original insertion order of elements with equal keys. + * + * Tommy is released with the \ref license "2-clause BSD license". + * + * See the \ref design page for more details and limitations. + * + * \section Performance + * Here you can see some timings comparing with other notable implementations. + * The <i>Hit</i> graph shows the time required for searching random objects + * with a key. + * The <i>Change</i> graph shows the time required for searching, removing and + * reinsert random objects with a different key value. + * + * Times are expressed in nanoseconds for each element, and <b>lower is better</b>. + * + * To have some reference numbers, you can check <a href="https://gist.github.com/jboner/2841832">Latency numbers every programmer should know</a>. + * + * A complete analysis is available in the \ref benchmark page. + * + * <img src="def/img_random_hit.png"/> + * + * <img src="def/img_random_change.png"/> + * + * \page benchmark Tommy Benchmarks + * + * To evaluate Tommy performances, an extensive benchmark was done, + * comparing it to the best libraries of data structures available: + * + * Specifically we test: + * - ::tommy_hashtable - Fixed size chained hashtable. + * - ::tommy_hashdyn - Dynamic chained hashtable. + * - ::tommy_hashlin - Linear chained hashtable. + * - ::tommy_trie - Trie optimized for cache usage. + * - ::tommy_trie_inplace - Trie completely inplace. + * - <a href="http://www.canonware.com/rb/">rbtree</a> - Red-black tree by Jason Evans. + * - <a href="http://www.nedprod.com/programs/portable/nedtries/">nedtrie</a> - Binary trie inplace by Niall Douglas. + * - <a href="http://attractivechaos.awardspace.com/">khash</a> - Dynamic open addressing hashtable by Attractive Chaos. + * - <a href="http://uthash.sourceforge.net/">uthash</a> - Dynamic chaining hashtable by Troy D. Hanson. + * - <a href="http://judy.sourceforge.net/">judy</a> - Burst trie (JudyL) by Doug Baskins. + * - <a href="http://code.google.com/p/judyarray/">judyarray</a> - Burst trie by Karl Malbrain. + * - <a href="http://code.google.com/p/google-sparsehash/">googledensehash</a> - Dynamic open addressing hashtable by Craig Silverstein at Google. + * - <a href="http://code.google.com/p/cpp-btree/">googlebtree</a> - Btree by Google. + * - <a href="http://panthema.net/2007/stx-btree/">stxbtree</a> - STX Btree by Timo Bingmann. + * - <a href="http://www.cplusplus.com/reference/unordered_map/unordered_map/">c++unordered_map</a> - C++ STL unordered_map<> template. + * - <a href="http://www.cplusplus.com/reference/map/map/">c++map</a> - C++ STL map<> template. + * - <a href="https://sites.google.com/site/binarysearchcube/">tesseract</a> - Binary Search Tesseract by Gregorius van den Hoven. + * - <a href="https://code.google.com/p/sparsehash/source/browse/trunk/experimental/libchash.c">googlelibchash</a> - LibCHash by Craig Silverstein at Google. + * - <a href="https://github.com/fredrikwidlund/libdynamic">libdynamic</a> - Hash set by Fredrik Widlund. + * - <a href="http://concurrencykit.org/">concurrencykit</a> - Non-blocking hash set by Samy Al Bahra. + * + * Note that <em>googlelibchash</em> and <em>concurrencykit</em> are not shown in the graphs + * because they present a lot of spikes. See the \ref notes the end. + * + * \section thebenchmark The Benchmark + * + * The benchmark consists in storing a set of N pointers to objects and + * searching them using integer keys. + * + * Compared to the case of mapping integers to integers, mapping pointers to + * objects means that the pointers are also dereferenced, to simulate the + * object access, resulting in additional cache misses. + * This gives an advantage to implementations that store information in the + * objects itself, as the additional cache misses are already implicit. + * + * The test done are: + * - <b>Insert</b> Insert all the objects starting with an empty container. + * - <b>Change</b> Find and remove one object and reinsert it with a different key, repeated for all the objects. + * - <b>Hit</b> Find with success all the objects and dereference them. + * - <b>Miss</b> Find with failure all the objects. + * - <b>Remove</b> Remove all the objects and dereference them. + * + * The <i>Change</i>, <i>Hit</i> and <i>Miss</i> tests operate always with N + * objects in the containers. + * The <i>Insert</i> test starts with an empty container, and the <i>Remove</i> + * test ends with an empty container. + * The objects are always dereferenced, as we are supposing to use them. This + * happens even in the remove case, as we are supposing to deallocate them. + * + * All the objects are preallocated in the heap, and the allocation and deallocation + * time is not included in the test. + * + * The objects contain an integer <i>value</i> field used for consistency checks, + * an unused <i>payload</i> field of 16 bytes, and any other data required by the + * data structure. + * + * The objects are identified and stored using integer and unique <i>keys</i>. + * The key domain used is <strong>dense</strong>, and it's defined by the set + * of N even numbers starting from 0x80000000 to 0x80000000+2*N. + * + * The use of even numbers allows to have missing keys inside the domain for + * the <i>Miss</i> and <i>Change</i> test. + * In such tests it's used the key domain defined by the set of N odd numbers + * starting from 0x80000000+1 to 0x80000000+2*N+1. + * Note that using additional keys at the corners of the domain would have given + * an unfair advantage to tries and trees as they implicitly keep track of the + * maximum and minimum key value inserted. + * + * The use of the 0x80000000 base, allow to test a key domain not necessarily + * starting at 0. Using a 0 base would have given an unfair advantage to some + * implementation handling it as a special case. + * + * For all the hashtables the keys are hashed using the tommy_inthash_u32() + * function that ensures an uniform distribution. This hash function is also + * reversible, meaning that no collision is going to be caused by hashing the + * keys. For tries and trees the keys are not hashed, and used directly. + * + * The tests are repeated using keys in <i>Random</i> mode and in <i>Forward</i> + * mode. In the forward mode the key values are used in order from the lowest + * to the highest. + * In the random mode the key values are used in a completely random order. + * In the <i>Change</i> test in forward mode, each object is reinserted using + * the previous key incremented by 1. In random mode each object is reinserted + * using a completely different and uncorrelated key. + * + * The forward order advantages tries and trees as they use the key directly + * and they have a cache advantage on using consecutive keys. + * The random order advantages hashtables, as the hash function already + * randomizes the key. Usually real uses case are in between, and the random + * one is the worst case. + * + * \section result Results + * + * The most significant tests depend on your data usage model, but if in doubt, + * you can look at <i>Random Hit</i> and <i>Random Change</i>. + * They represent the real world worst condition. + * + * <img src="def/img_random_hit.png"/> + * + * In the <i>Random Hit</i> graph you can see a vertical split at the 100.000 + * elements limit. Before this limit the cache of modern processor is able to + * contains most of the data, and it allows a very fast access with almost all + * data structures. + * After this limit, the number of cache misses is the dominant factor, and + * the curve depends mainly on the number of cache-miss required. + * + * rbtree and nedtrie grow as log2(N) as they have two branches on each node, + * ::tommy_trie_inplace grows as log4(N), ::tommy_trie as log8(N) and hashtables + * are almost constant and don't grow. + * For ::tommy_trie_inplace and ::tommy_trie you can change the grow curve + * configuring a different number of branches for node. + * + * <img src="def/img_random_change.png"/> + * + * The <i>Random Change</i> graph confirms the vertical split at the 100.000 + * elements limit. It also show that hashtables are almost unbeatable with a + * random access. + * + * \section random Random order + * Here you can see the whole <i>Random</i> test results in different platforms. + * + * In the <i>Random</i> test, hashtables are almost always winning, seconds are + * tries, and as last trees. + * + * The best choices are ::tommy_hashdyn, ::tommy_hashlin, and googledensehash, + * with ::tommy_hashlin having the advantage to be real-time friendly and not + * increasing the heap fragmentation. + * <table border="0"> + * <tr><td> + * <img src="core_i5_650_3G2_linux/img_random_insert.png"/> + * </td></tr><tr><td> + * <img src="core_i5_650_3G2_linux/img_random_hit.png"/> + * </td></tr><tr><td> + * <img src="core_i5_650_3G2_linux/img_random_miss.png"/> + * </td></tr><tr><td> + * <img src="core_i5_650_3G2_linux/img_random_change.png"/> + * </td></tr><tr><td> + * <img src="core_i5_650_3G2_linux/img_random_remove.png"/> + * </td></tr> + * </table> + * + * \section forward Forward order + * Here you can see the whole <i>Forward</i> test results in different platforms. + * + * In the <i>Forward</i> test, tries are the winners. Hashtables are competitive + * until the cache limit, then they lose against tries. Trees are the slowest. + * + * The best choices are ::tommy_trie and ::tommy_trie_inplace, where ::tommy_trie is + * a bit faster, and ::tommy_trie_inplace doesn't require a custom allocator. + * + * Note that also hashtables are faster in forward order than random. This may + * seem a bit surprising as the hash function randomizes the access even with + * consecutive keys. This happens because the objects are allocated in consecutive + * memory, and accessing them in order, improves the cache utilization, even if + * the hashed key is random. + * + * Note that you can make hashtables to reach tries performance tweaking the hash + * function to put near keys allocated nearby. + * This is possible if you know in advance the distribution of keys. + * For example, in the benchmark you could use something like: + * \code + * #define hash(v) tommy_inthash_u32(v & ~0xF) + (v & 0xF) + * \endcode + * and make keys that differ only by the lowest bits to have hashes with the same + * property, resulting in objects stored nearby, and improving cache utilization. + * + * <table border="0"> + * <tr><td> + * <img src="core_i5_650_3G2_linux/img_forward_insert.png"/> + * </td></tr><tr><td> + * <img src="core_i5_650_3G2_linux/img_forward_hit.png"/> + * </td></tr><tr><td> + * <img src="core_i5_650_3G2_linux/img_forward_miss.png"/> + * </td></tr><tr><td> + * <img src="core_i5_650_3G2_linux/img_forward_change.png"/> + * </td></tr><tr><td> + * <img src="core_i5_650_3G2_linux/img_forward_remove.png"/> + * </td></tr> + * </table> + * + * \section size Size + * Here you can see the memory usage of the different data structures. + * <table border="0"> + * <tr><td> + * <img src="core_i5_650_3G2_linux/img_random_size.png"/> + * </td></tr> + * </table> + * + * \section code Code + * + * The compilers used in the benchmark are: + * - <b>gcc 4.9.2</b> in Linux with options: -O3 -march=nehalem + * - <b>Visual C 2012</b> in Windows with options: /Ox /Oy- /GL /GS- /arch:SSE2 + * + * The following is pseudo code of the benchmark used. In this case it's written + * for the C++ unordered_map. + * + * \code + * #define N 10000000 // Number of elements + * #define PAYLOAD 16 // Size of the object + * + * // Basic object inserted in the colletion + * struct obj { + * unsigned value; // Key used for searching + * char payload[PAYLOAD]; + * }; + * + * // Custom hash function to avoid to use the STL one + * class custom_hash { + * public: + * unsigned operator()(unsigned key) const { return tommy_inthash_u32(key); } + * }; + * + * // Map collection from "unsigned" to "pointer to object" + * typedef std::unordered_map<unsigned, obj*, custom_hash> bag_t; + * bag_t bag; + * + * // Preallocate objects + * obj* OBJ = new obj[N]; + * + * // Keys used for inserting and searching elements + * unsigned INSERT[N]; + * unsigned SEARCH[N]; + * + * // Initialize the keys + * for(i=0;i<N;++i) { + * INSERT[i] = 0x80000000 + i * 2; + * SEARCH[i] = 0x80000000 + i * 2; + * } + * + * // If random order is required, shuffle the keys with Fisher-Yates + * // The two key orders are not correlated + * if (test_random) { + * std::random_shuffle(INSERT, INSERT + N); + * std::random_shuffle(SEARCH, SEARCH + N); + * } + * \endcode + * + * \subsection insertion Insert benchmark + * \code + * for(i=0;i<N;++i) { + * // Setup the element to insert + * unsigned key = INSERT[i]; + * obj* element = &OBJ[i]; + * element->value = key; + * + * // Insert it + * bag[key] = element; + * } + * \endcode + * + * \subsection change Change benchmark + * \code + * for(i=0;i<N;++i) { + * // Search the element + * unsigned key = SEARCH[i]; + * bag_t::iterator j = bag.find(key); + * if (j == bag.end()) + * abort(); + * + * // Remove it + * obj* element = j->second; + * bag.erase(j); + * + * // Reinsert the element with a new key + * // Use +1 in the key to ensure that the new key is unique + * key = INSERT[i] + 1; + * element->value = key; + * bag[key] = element; + * } + * \endcode + * + * \subsection hit Hit benchmark + * \code + * for(i=0;i<N;++i) { + * // Search the element + * // Use a different key order than insertion + * // Use +1 in the key because we run after the "Change" test + * unsigned key = SEARCH[i] + 1; + * bag_t::const_iterator j = bag.find(key); + * if (j == bag.end()) + * abort(); + * + * // Ensure that it's the correct element. + * // This operation is like using the object after finding it, + * // and likely involves a cache-miss operation. + * obj* element = j->second; + * if (element->value != key) + * abort(); + * } + * \endcode + * + * \subsection miss Miss benchmark + * \code + * for(i=0;i<N;++i) { + * // Search the element + * // All the keys are now shifted by +1 by the "Change" test, and we'll find nothing + * unsigned key = SEARCH[i]; + * bag_t::const_iterator j = bag.find(key); + * if (j != bag.end()) + * abort(); + * } + * \endcode + * + * \subsection remove Remove benchmark + * \code + * for(i=0;i<N;++i) { + * // Search the element + * // Use +1 in the key because we run after the "Change" test + * unsigned key = SEARCH[i] + 1; + * bag_t::iterator j = bag.find(key); + * if (j == bag.end()) + * abort(); + * + * // Remove it + * bag.erase(j); + * + * // Ensure that it's the correct element. + * obj* element = j->second; + * if (element->value != key) + * abort(); + * } + * \endcode + * + * \section others Other benchmarks + * Here some links to other performance comparison: + * + * <a href="http://attractivechaos.wordpress.com/2008/08/28/comparison-of-hash-table-libraries/">Comparison of Hash Table Libraries</a> + * + * <a href="http://incise.org/hash-table-benchmarks.html">Hash Table Benchmarks</a> + * + * \section notes Notes + * + * Here some notes about the data structure tested not part of Tommy. + * + * \subsection googlelibchash Google C libchash + * It's the C implementation located in the <i>experimental/</i> directory of the googlesparsehash archive. + * It has very bad performances in the <i>Change</i> test for some N values. + * See this <a href="other/googlelibchash_problem.png">graph</a> with a lot of spikes. + * The C++ version doesn't suffer of this problem. + * + * \subsection googledensehash Google C++ densehash + * It doesn't release memory on deletion. + * To avoid an unfair advantage in the <i>Remove</i> test, we force a periodic + * resize calling resize(0) after any deallocation. + * The resize is executed when the load factor is lower than 20%. + * + * \subsection khash khash + * It doesn't release memory on deletion. This gives an unfair advantage on the <i>Remove</i> test. + * + * \subsection nedtrie nedtrie + * I've found a crash bug when inserting keys with the 0 value. + * The <a href="https://github.com/ned14/nedtries/commit/21039696f27db4ffac70a82f89dc5d00ae74b332">fix</a> of this issue is now in the nedtries github. + * We do not use the C++ implementation as it doesn't compile with gcc 4.4.3. + * + * \subsection judy Judy + * Sometimes it has bad performances in some specific platform + * and for some specific input data size. + * This makes difficult to predict the performance, as it is usually good until + * you get one of these cases. + * See for example this <a href="other/judy_problem.png">graph</a> with a big replicable spike at 50.000 elements. + * + * \subsection ck Concurrency Kit + * It has very bad performances in the <i>Change</i> test for some N values. + * See this <a href="other/ck_problem.png">graph</a> with a lot of spikes. + * + * \page multiindex Tommy Multi Indexing + * + * Tommy provides only partial iterator support with the "foreach" functions. + * If you need real iterators you have to insert all the objects also in a ::tommy_list, + * and use the list as iterator. + * + * This technique allows to keep track of the insertion order with the list, + * and provides more search possibilities using different data structures for + * different search keys. + * + * See the next example, for a objects inserted in a ::tommy_list, and in + * two ::tommy_hashdyn using different keys. + * + * \code + * struct object { + * // data fields + * int value_0; + * int value_1; + * + * // for containers + * tommy_node list_node; // node for the list + * tommy_node hash_node_0; // node for the first hash + * tommy_node hash_node_1; // node for the second hash + * }; + * + * // search function for value_1 + * int search_1(const void* arg, const void* obj) + * { + * return *(const int*)arg != ((const struct object*)obj)->value_1; + * } + * + * tommy_hashdyn hash_0; + * tommy_hashdyn hash_1; + * tommy_list list; + * + * // initializes the hash tables and the list + * tommy_hashdyn_init(&hash_0); + * tommy_hashdyn_init(&hash_1); + * tommy_list_init(&list); + * + * ... + * + * // creates an object and inserts it + * struct object* obj = malloc(sizeof(struct object)); + * obj->value_0 = ...; + * obj->value_1 = ...; + * // inserts in the first hash table + * tommy_hashdyn_insert(&hash_0, &obj->hash_node_0, obj, tommy_inthash_u32(obj->value_0)); + * // inserts in the second hash table + * tommy_hashdyn_insert(&hash_1, &obj->hash_node_1, obj, tommy_inthash_u32(obj->value_1)); + * // inserts in the list + * tommy_list_insert_tail(&list, &obj->list_node, obj); + * + * ... + * + * // searches an object by value_1 and deletes it + * int value_to_find = ...; + * struct object* obj = tommy_hashdyn_search(&hash_1, search_1, &value_to_find, tommy_inthash_u32(value_to_find)); + * if (obj) { + * // if found removes all the references + * tommy_hashdyn_remove_existing(&hash_0, &obj->hash_node_0); + * tommy_hashdyn_remove_existing(&hash_1, &obj->hash_node_1); + * tommy_list_remove_existing(&list, &obj->list_node); + * } + * + * ... + * + * // complex iterator logic + * tommy_node* i = tommy_list_head(&list); + * while (i != 0) { + * // get the object + * struct object* obj = i->data; + * ... + * // go to the next element + * i = i->next; + * ... + * // go to the prev element + * i = i->prev; + * ... + * } + * + * ... + * + * // deallocates the objects iterating the list + * tommy_list_foreach(&list, free); + * + * // deallocates the hash tables + * tommy_hashdyn_done(&hash_0); + * tommy_hashdyn_done(&hash_1); + * \endcode + * + * \page design Tommy Design + * + * Tommy is designed to fulfill the need of generic data structures for the + * C language, providing at the same time high performance and a clean + * and easy to use interface. + * + * \section testing Testing + * + * Extensive and automated tests with the runtime checker <a href="http://valgrind.org/">valgrind</a> + * and the static analyzer <a href="http://clang-analyzer.llvm.org/">clang</a> + * are done to ensure the correctness of the library. + * + * The test has a <a href="http://www.tommyds.it/cov/tommyds/tommyds">code coverage of 100%</a>, + * measured with <a href="http://ltp.sourceforge.net/coverage/lcov.php">lcov</a>. + * + * \section Limitations + * + * Tommy is not thread safe. You have always to provide thread safety using + * locks before calling any Tommy functions. + * + * Tommy doesn't provide iterators over the implicit order defined by the data + * structures. To iterate on elements you must insert them also into a ::tommy_list, + * and use the list as iterator. See the \ref multiindex example for more details. + * Note that this is a real limitation only for ::tommy_trie, as it's the only + * data structure defining an useable order. + * + * Tommy doesn't provide an error reporting mechanism for a malloc() failure. + * You have to provide it redefining malloc() if you expect it to fail. + * + * Tommy assumes to never have more than 2^32-1 elements in a container. + * + * \section compromise Compromises + * + * Finding the right balance between efficency and easy to use required some + * comprimises. Most of them are on memory efficency, and were done to avoid to + * cripple the interface. + * + * The following is a list of such decisions. + * + * \subsection multi_key Multi key + * All the Tommy containers support the insertion of multiple elements with + * the same key, adding in each node a list of equal elements. + * + * They are the equivalent at the C++ associative containers <a href="http://www.cplusplus.com/reference/map/multimap/">multimap\<unsigned,void*\></a> + * and <a href="http://www.cplusplus.com/reference/unordered_map/unordered_multimap/">unordered_multimap\<unsigned,void*\></a> + * that allow duplicates of the same key. + * + * A more memory conservative approach is to not allow duplicated elements, + * removing the need of this list. + * + * \subsection data_pointer Data pointer + * The tommy_node::data field is present to allow search and remove functions to return + * directly a pointer to the element stored in the container. + * + * A more memory conservative approach is to require the user to compute + * the element pointer from the embedded node with a fixed displacement. + * For an example, see the Linux Kernel declaration of + * <a href="http://lxr.free-electrons.com/ident?i=container_of">container_of()</a>. + * + * \subsection insertion_order Insertion order + * The list used for collisions is double linked to allow + * insertion of elements at the end of the list to keep the + * insertion order of equal elements. + * + * A more memory conservative approach is to use a single linked list, + * inserting elements only at the start of the list, losing the + * original insertion order. + * + * \subsection zero_list Zero terminated list + * The 0 terminated format of tommy_node::next is present to provide a forward + * iterator terminating in 0. This allows the user to write a simple iteration + * loop over the list of elements in the same bucket. + * + * A more efficient approach is to use a circular list, because operating on nodes + * in a circular list doesn't requires to manage the special terminating case when + * adding or removing elements. + * + * \page license Tommy License + * Tommy is released with a <i>2-clause BSD license</i>. + * + * \code + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * \endcode + */ + +/** \file + * All in one include for Tommy. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tommytypes.h" +#include "tommyhash.h" +#include "tommyalloc.h" +#include "tommyarray.h" +#include "tommyarrayof.h" +#include "tommyarrayblk.h" +#include "tommyarrayblkof.h" +#include "tommylist.h" +#include "tommytree.h" +#include "tommytrie.h" +#include "tommytrieinp.h" +#include "tommyhashtbl.h" +#include "tommyhashdyn.h" +#include "tommyhashlin.h" + +#ifdef __cplusplus +} +#endif + diff --git a/third-party/tommyds/tommyalloc.c b/third-party/tommyds/tommyalloc.c new file mode 100644 index 0000000..0da604c --- /dev/null +++ b/third-party/tommyds/tommyalloc.c @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommyalloc.h" + +/******************************************************************************/ +/* allocator */ + +/** + * Basic allocation segment. + * Smaller of a memory page, to allow also a little heap overread. + * The heap manager may put it in a single memory page. + */ +#define TOMMY_ALLOCATOR_BLOCK_SIZE (4096 - 64) + +void tommy_allocator_init(tommy_allocator* alloc, tommy_size_t block_size, tommy_size_t align_size) +{ + /* setup the minimal alignment */ + if (align_size < sizeof(void*)) + align_size = sizeof(void*); + + /* ensure that the block_size keeps the alignment */ + if (block_size % align_size != 0) + block_size += align_size - block_size % align_size; + + alloc->block_size = block_size; + alloc->align_size = align_size; + + alloc->count = 0; + alloc->free_block = 0; + alloc->used_segment = 0; +} + +/** + * Reset the allocator and free all. + */ +static void allocator_reset(tommy_allocator* alloc) +{ + tommy_allocator_entry* block = alloc->used_segment; + + while (block) { + tommy_allocator_entry* block_next = block->next; + tommy_free(block); + block = block_next; + } + + alloc->count = 0; + alloc->free_block = 0; + alloc->used_segment = 0; +} + +void tommy_allocator_done(tommy_allocator* alloc) +{ + allocator_reset(alloc); +} + +void* tommy_allocator_alloc(tommy_allocator* alloc) +{ + void* ptr; + + /* if no free block available */ + if (!alloc->free_block) { + tommy_uintptr_t off, mis; + tommy_size_t size; + char* data; + tommy_allocator_entry* segment; + + /* default allocation size */ + size = TOMMY_ALLOCATOR_BLOCK_SIZE; + + /* ensure that we can allocate at least one block */ + if (size < sizeof(tommy_allocator_entry) + alloc->align_size + alloc->block_size) + size = sizeof(tommy_allocator_entry) + alloc->align_size + alloc->block_size; + + data = tommy_cast(char*, tommy_malloc(size)); + segment = (tommy_allocator_entry*)data; + + /* put in the segment list */ + segment->next = alloc->used_segment; + alloc->used_segment = segment; + data += sizeof(tommy_allocator_entry); + + /* align if not aligned */ + off = (tommy_uintptr_t)data; + mis = off % alloc->align_size; + if (mis != 0) { + data += alloc->align_size - mis; + size -= alloc->align_size - mis; + } + + /* insert in free list */ + do { + tommy_allocator_entry* free_block = (tommy_allocator_entry*)data; + free_block->next = alloc->free_block; + alloc->free_block = free_block; + + data += alloc->block_size; + size -= alloc->block_size; + } while (size >= alloc->block_size); + } + + /* remove one from the free list */ + ptr = alloc->free_block; + alloc->free_block = alloc->free_block->next; + + ++alloc->count; + + return ptr; +} + +void tommy_allocator_free(tommy_allocator* alloc, void* ptr) +{ + tommy_allocator_entry* free_block = tommy_cast(tommy_allocator_entry*, ptr); + + /* put it in the free list */ + free_block->next = alloc->free_block; + alloc->free_block = free_block; + + --alloc->count; +} + +tommy_size_t tommy_allocator_memory_usage(tommy_allocator* alloc) +{ + return alloc->count * (tommy_size_t)alloc->block_size; +} + diff --git a/third-party/tommyds/tommyalloc.h b/third-party/tommyds/tommyalloc.h new file mode 100644 index 0000000..7a9afea --- /dev/null +++ b/third-party/tommyds/tommyalloc.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Allocator of fixed size blocks. + */ + +#ifndef __TOMMYALLOC_H +#define __TOMMYALLOC_H + +#include "tommytypes.h" + +/******************************************************************************/ +/* allocator */ + +/** \internal + * Allocator entry. + */ +struct tommy_allocator_entry_struct { + struct tommy_allocator_entry_struct* next; /**< Pointer to the next entry. 0 for last. */ +}; +typedef struct tommy_allocator_entry_struct tommy_allocator_entry; + +/** + * Allocator of fixed size blocks. + */ +typedef struct tommy_allocator_struct { + struct tommy_allocator_entry_struct* free_block; /**< List of free blocks. */ + struct tommy_allocator_entry_struct* used_segment; /**< List of allocated segments. */ + tommy_size_t block_size; /**< Block size. */ + tommy_size_t align_size; /**< Alignment size. */ + tommy_count_t count; /**< Number of allocated elements. */ +} tommy_allocator; + +/** + * Initializes the allocator. + * \param alloc Allocator to initialize. + * \param block_size Size of the block to allocate. + * \param align_size Minimum alignment requirement. No less than sizeof(void*). + */ +void tommy_allocator_init(tommy_allocator* alloc, tommy_size_t block_size, tommy_size_t align_size); + +/** + * Deinitialize the allocator. + * It also releases all the allocated memory to the heap. + * \param alloc Allocator to deinitialize. + */ +void tommy_allocator_done(tommy_allocator* alloc); + +/** + * Allocates a block. + * \param alloc Allocator to use. + */ +void* tommy_allocator_alloc(tommy_allocator* alloc); + +/** + * Deallocates a block. + * You must use the same allocator used in the tommy_allocator_alloc() call. + * \param alloc Allocator to use. + * \param ptr Block to free. + */ +void tommy_allocator_free(tommy_allocator* alloc, void* ptr); + +/** + * Gets the size of allocated memory. + * \param alloc Allocator to use. + */ +tommy_size_t tommy_allocator_memory_usage(tommy_allocator* alloc); + +#endif + diff --git a/third-party/tommyds/tommyarray.c b/third-party/tommyds/tommyarray.c new file mode 100644 index 0000000..a8d946b --- /dev/null +++ b/third-party/tommyds/tommyarray.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2011, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommyarray.h" + +/******************************************************************************/ +/* array */ + +void tommy_array_init(tommy_array* array) +{ + tommy_uint_t i; + + /* fixed initial size */ + array->bucket_bit = TOMMY_ARRAY_BIT; + array->bucket_max = 1 << array->bucket_bit; + array->bucket[0] = tommy_cast(void**, tommy_calloc(array->bucket_max, sizeof(void*))); + for (i = 1; i < TOMMY_ARRAY_BIT; ++i) + array->bucket[i] = array->bucket[0]; + + array->count = 0; +} + +void tommy_array_done(tommy_array* array) +{ + tommy_uint_t i; + + tommy_free(array->bucket[0]); + for (i = TOMMY_ARRAY_BIT; i < array->bucket_bit; ++i) { + void** segment = array->bucket[i]; + tommy_free(&segment[((tommy_ptrdiff_t)1) << i]); + } +} + +void tommy_array_grow(tommy_array* array, tommy_count_t count) +{ + if (array->count >= count) + return; + array->count = count; + + while (count > array->bucket_max) { + void** segment; + + /* allocate one more segment */ + segment = tommy_cast(void**, tommy_calloc(array->bucket_max, sizeof(void*))); + + /* store it adjusting the offset */ + /* cast to ptrdiff_t to ensure to get a negative value */ + array->bucket[array->bucket_bit] = &segment[-(tommy_ptrdiff_t)array->bucket_max]; + + ++array->bucket_bit; + array->bucket_max = 1 << array->bucket_bit; + } +} + +tommy_size_t tommy_array_memory_usage(tommy_array* array) +{ + return array->bucket_max * (tommy_size_t)sizeof(void*); +} + diff --git a/third-party/tommyds/tommyarray.h b/third-party/tommyds/tommyarray.h new file mode 100644 index 0000000..c04193f --- /dev/null +++ b/third-party/tommyds/tommyarray.h @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2011, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Dynamic array based on segments of exponential growing size. + * + * This array is able to grow dynamically upon request, without any reallocation. + * + * The grow operation involves an allocation of a new array segment, without reallocating + * the already used memory, and then not increasing the heap fragmentation. + * This also implies that the address of the stored elements never change. + * + * Allocated segments grow in size exponentially. + */ + +#ifndef __TOMMYARRAY_H +#define __TOMMYARRAY_H + +#include "tommytypes.h" + +#include <assert.h> /* for assert */ + +/******************************************************************************/ +/* array */ + +/** + * Initial and minimal size of the array expressed as a power of 2. + * The initial size is 2^TOMMY_ARRAY_BIT. + */ +#define TOMMY_ARRAY_BIT 6 + +/** \internal + * Max number of elements as a power of 2. + */ +#define TOMMY_ARRAY_BIT_MAX 32 + +/** + * Array container type. + * \note Don't use internal fields directly, but access the container only using functions. + */ +typedef struct tommy_array_struct { + void** bucket[TOMMY_ARRAY_BIT_MAX]; /**< Dynamic array of buckets. */ + tommy_uint_t bucket_bit; /**< Bits used in the bit mask. */ + tommy_count_t bucket_max; /**< Number of buckets. */ + tommy_count_t count; /**< Number of initialized elements in the array. */ +} tommy_array; + +/** + * Initializes the array. + */ +void tommy_array_init(tommy_array* array); + +/** + * Deinitializes the array. + */ +void tommy_array_done(tommy_array* array); + +/** + * Grows the size up to the specified value. + * All the new elements in the array are initialized with the 0 value. + */ +void tommy_array_grow(tommy_array* array, tommy_count_t size); + +/** + * Gets a reference of the element at the specified position. + * You must be sure that space for this position is already + * allocated calling tommy_array_grow(). + */ +tommy_inline void** tommy_array_ref(tommy_array* array, tommy_count_t pos) +{ + tommy_uint_t bsr; + + assert(pos < array->count); + + /* get the highest bit set, in case of all 0, return 0 */ + bsr = tommy_ilog2_u32(pos | 1); + + return &array->bucket[bsr][pos]; +} + +/** + * Sets the element at the specified position. + * You must be sure that space for this position is already + * allocated calling tommy_array_grow(). + */ +tommy_inline void tommy_array_set(tommy_array* array, tommy_count_t pos, void* element) +{ + *tommy_array_ref(array, pos) = element; +} + +/** + * Gets the element at the specified position. + * You must be sure that space for this position is already + * allocated calling tommy_array_grow(). + */ +tommy_inline void* tommy_array_get(tommy_array* array, tommy_count_t pos) +{ + return *tommy_array_ref(array, pos); +} + +/** + * Grows and inserts a new element at the end of the array. + */ +tommy_inline void tommy_array_insert(tommy_array* array, void* element) +{ + tommy_count_t pos = array->count; + + tommy_array_grow(array, pos + 1); + + tommy_array_set(array, pos, element); +} + +/** + * Gets the initialized size of the array. + */ +tommy_inline tommy_count_t tommy_array_size(tommy_array* array) +{ + return array->count; +} + +/** + * Gets the size of allocated memory. + */ +tommy_size_t tommy_array_memory_usage(tommy_array* array); + +#endif + diff --git a/third-party/tommyds/tommyarrayblk.c b/third-party/tommyds/tommyarrayblk.c new file mode 100644 index 0000000..d4dda76 --- /dev/null +++ b/third-party/tommyds/tommyarrayblk.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2013, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommyarrayblk.h" + +/******************************************************************************/ +/* array */ + +void tommy_arrayblk_init(tommy_arrayblk* array) +{ + tommy_array_init(&array->block); + + array->count = 0; +} + +void tommy_arrayblk_done(tommy_arrayblk* array) +{ + tommy_count_t i; + + for (i = 0; i < tommy_array_size(&array->block); ++i) + tommy_free(tommy_array_get(&array->block, i)); + + tommy_array_done(&array->block); +} + +void tommy_arrayblk_grow(tommy_arrayblk* array, tommy_count_t count) +{ + tommy_count_t block_max; + tommy_count_t block_mac; + + if (array->count >= count) + return; + array->count = count; + + block_max = (count + TOMMY_ARRAYBLK_SIZE - 1) / TOMMY_ARRAYBLK_SIZE; + block_mac = tommy_array_size(&array->block); + + if (block_mac < block_max) { + /* grow the block array */ + tommy_array_grow(&array->block, block_max); + + /* allocate new blocks */ + while (block_mac < block_max) { + void** ptr = tommy_cast(void**, tommy_calloc(TOMMY_ARRAYBLK_SIZE, sizeof(void*))); + + /* set the new block */ + tommy_array_set(&array->block, block_mac, ptr); + + ++block_mac; + } + } +} + +tommy_size_t tommy_arrayblk_memory_usage(tommy_arrayblk* array) +{ + return tommy_array_memory_usage(&array->block) + tommy_array_size(&array->block) * TOMMY_ARRAYBLK_SIZE * sizeof(void*); +} + diff --git a/third-party/tommyds/tommyarrayblk.h b/third-party/tommyds/tommyarrayblk.h new file mode 100644 index 0000000..20d0525 --- /dev/null +++ b/third-party/tommyds/tommyarrayblk.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2013, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Dynamic array based on blocks of fixed size. + * + * This array is able to grow dynamically upon request, without any reallocation. + * + * The grow operation involves an allocation of a new array block, without reallocating + * the already used memory, and then not increasing the heap fragmentation, + * and minimize the space occupation. + * This also implies that the address of the stored elements never change. + * + * Allocated blocks are always of the same fixed size of 4 Ki pointers. + */ + +#ifndef __TOMMYARRAYBLK_H +#define __TOMMYARRAYBLK_H + +#include "tommytypes.h" +#include "tommyarray.h" + +#include <assert.h> /* for assert */ + +/******************************************************************************/ +/* array */ + +/** + * Elements for each block. + */ +#define TOMMY_ARRAYBLK_SIZE (4 * 1024) + +/** + * Array container type. + * \note Don't use internal fields directly, but access the container only using functions. + */ +typedef struct tommy_arrayblk_struct { + tommy_array block; /**< Array of blocks. */ + tommy_count_t count; /**< Number of initialized elements in the array. */ +} tommy_arrayblk; + +/** + * Initializes the array. + */ +void tommy_arrayblk_init(tommy_arrayblk* array); + +/** + * Deinitializes the array. + */ +void tommy_arrayblk_done(tommy_arrayblk* array); + +/** + * Grows the size up to the specified value. + * All the new elements in the array are initialized with the 0 value. + */ +void tommy_arrayblk_grow(tommy_arrayblk* array, tommy_count_t size); + +/** + * Gets a reference of the element at the specified position. + * You must be sure that space for this position is already + * allocated calling tommy_arrayblk_grow(). + */ +tommy_inline void** tommy_arrayblk_ref(tommy_arrayblk* array, tommy_count_t pos) +{ + void** ptr; + + assert(pos < array->count); + + ptr = tommy_cast(void**, tommy_array_get(&array->block, pos / TOMMY_ARRAYBLK_SIZE)); + + return &ptr[pos % TOMMY_ARRAYBLK_SIZE]; +} + +/** + * Sets the element at the specified position. + * You must be sure that space for this position is already + * allocated calling tommy_arrayblk_grow(). + */ +tommy_inline void tommy_arrayblk_set(tommy_arrayblk* array, tommy_count_t pos, void* element) +{ + *tommy_arrayblk_ref(array, pos) = element; +} + +/** + * Gets the element at the specified position. + * You must be sure that space for this position is already + * allocated calling tommy_arrayblk_grow(). + */ +tommy_inline void* tommy_arrayblk_get(tommy_arrayblk* array, tommy_count_t pos) +{ + return *tommy_arrayblk_ref(array, pos); +} + +/** + * Grows and inserts a new element at the end of the array. + */ +tommy_inline void tommy_arrayblk_insert(tommy_arrayblk* array, void* element) +{ + tommy_count_t pos = array->count; + + tommy_arrayblk_grow(array, pos + 1); + + tommy_arrayblk_set(array, pos, element); +} + +/** + * Gets the initialized size of the array. + */ +tommy_inline tommy_count_t tommy_arrayblk_size(tommy_arrayblk* array) +{ + return array->count; +} + +/** + * Gets the size of allocated memory. + */ +tommy_size_t tommy_arrayblk_memory_usage(tommy_arrayblk* array); + +#endif + diff --git a/third-party/tommyds/tommyarrayblkof.c b/third-party/tommyds/tommyarrayblkof.c new file mode 100644 index 0000000..8a491a0 --- /dev/null +++ b/third-party/tommyds/tommyarrayblkof.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2013, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommyarrayblkof.h" + +/******************************************************************************/ +/* array */ + +void tommy_arrayblkof_init(tommy_arrayblkof* array, tommy_size_t element_size) +{ + tommy_array_init(&array->block); + + array->element_size = element_size; + array->count = 0; +} + +void tommy_arrayblkof_done(tommy_arrayblkof* array) +{ + tommy_count_t i; + + for (i = 0; i < tommy_array_size(&array->block); ++i) + tommy_free(tommy_array_get(&array->block, i)); + + tommy_array_done(&array->block); +} + +void tommy_arrayblkof_grow(tommy_arrayblkof* array, tommy_count_t count) +{ + tommy_count_t block_max; + tommy_count_t block_mac; + + if (array->count >= count) + return; + array->count = count; + + block_max = (count + TOMMY_ARRAYBLKOF_SIZE - 1) / TOMMY_ARRAYBLKOF_SIZE; + block_mac = tommy_array_size(&array->block); + + if (block_mac < block_max) { + /* grow the block array */ + tommy_array_grow(&array->block, block_max); + + /* allocate new blocks */ + while (block_mac < block_max) { + void** ptr = tommy_cast(void**, tommy_calloc(TOMMY_ARRAYBLKOF_SIZE, array->element_size)); + + /* set the new block */ + tommy_array_set(&array->block, block_mac, ptr); + + ++block_mac; + } + } +} + +tommy_size_t tommy_arrayblkof_memory_usage(tommy_arrayblkof* array) +{ + return tommy_array_memory_usage(&array->block) + tommy_array_size(&array->block) * TOMMY_ARRAYBLKOF_SIZE * array->element_size; +} + diff --git a/third-party/tommyds/tommyarrayblkof.h b/third-party/tommyds/tommyarrayblkof.h new file mode 100644 index 0000000..fce6840 --- /dev/null +++ b/third-party/tommyds/tommyarrayblkof.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2013, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Dynamic array based on blocks of fixed size. + * + * This array is able to grow dynamically upon request, without any reallocation. + * + * This is very similar at ::tommy_arrayblk, but it allows to store elements of any + * size and not just pointers. + * + * Note that in this case tommy_arrayblkof_ref() returns a pointer to the element, + * that should be used for getting and setting elements in the array, + * as generic getter and setter are not available. + */ + +#ifndef __TOMMYARRAYBLKOF_H +#define __TOMMYARRAYBLKOF_H + +#include "tommytypes.h" +#include "tommyarray.h" + +#include <assert.h> /* for assert */ + +/******************************************************************************/ +/* array */ + +/** + * Elements for each block. + */ +#define TOMMY_ARRAYBLKOF_SIZE (4 * 1024) + +/** + * Array container type. + * \note Don't use internal fields directly, but access the container only using functions. + */ +typedef struct tommy_arrayblkof_struct { + tommy_array block; /**< Array of blocks. */ + tommy_size_t element_size; /**< Size of the stored element in bytes. */ + tommy_count_t count; /**< Number of initialized elements in the array. */ +} tommy_arrayblkof; + +/** + * Initializes the array. + * \param element_size Size in byte of the element to store in the array. + */ +void tommy_arrayblkof_init(tommy_arrayblkof* array, tommy_size_t element_size); + +/** + * Deinitializes the array. + */ +void tommy_arrayblkof_done(tommy_arrayblkof* array); + +/** + * Grows the size up to the specified value. + * All the new elements in the array are initialized with the 0 value. + */ +void tommy_arrayblkof_grow(tommy_arrayblkof* array, tommy_count_t size); + +/** + * Gets a reference of the element at the specified position. + * You must be sure that space for this position is already + * allocated calling tommy_arrayblkof_grow(). + */ +tommy_inline void* tommy_arrayblkof_ref(tommy_arrayblkof* array, tommy_count_t pos) +{ + unsigned char* base; + + assert(pos < array->count); + + base = tommy_cast(unsigned char*, tommy_array_get(&array->block, pos / TOMMY_ARRAYBLKOF_SIZE)); + + return base + (pos % TOMMY_ARRAYBLKOF_SIZE) * array->element_size; +} + +/** + * Gets the initialized size of the array. + */ +tommy_inline tommy_count_t tommy_arrayblkof_size(tommy_arrayblkof* array) +{ + return array->count; +} + +/** + * Gets the size of allocated memory. + */ +tommy_size_t tommy_arrayblkof_memory_usage(tommy_arrayblkof* array); + +#endif + diff --git a/third-party/tommyds/tommyarrayof.c b/third-party/tommyds/tommyarrayof.c new file mode 100644 index 0000000..80aede7 --- /dev/null +++ b/third-party/tommyds/tommyarrayof.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2013, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommyarrayof.h" + +/******************************************************************************/ +/* array */ + +void tommy_arrayof_init(tommy_arrayof* array, tommy_size_t element_size) +{ + tommy_uint_t i; + + /* fixed initial size */ + array->element_size = element_size; + array->bucket_bit = TOMMY_ARRAYOF_BIT; + array->bucket_max = 1 << array->bucket_bit; + array->bucket[0] = tommy_calloc(array->bucket_max, array->element_size); + for (i = 1; i < TOMMY_ARRAYOF_BIT; ++i) + array->bucket[i] = array->bucket[0]; + + array->count = 0; +} + +void tommy_arrayof_done(tommy_arrayof* array) +{ + tommy_uint_t i; + + tommy_free(array->bucket[0]); + for (i = TOMMY_ARRAYOF_BIT; i < array->bucket_bit; ++i) { + unsigned char* segment = tommy_cast(unsigned char*, array->bucket[i]); + tommy_free(segment + (((tommy_ptrdiff_t)1) << i) * array->element_size); + } +} + +void tommy_arrayof_grow(tommy_arrayof* array, tommy_count_t count) +{ + if (array->count >= count) + return; + array->count = count; + + while (count > array->bucket_max) { + unsigned char* segment; + + /* allocate one more segment */ + segment = tommy_cast(unsigned char*, tommy_calloc(array->bucket_max, array->element_size)); + + /* store it adjusting the offset */ + /* cast to ptrdiff_t to ensure to get a negative value */ + array->bucket[array->bucket_bit] = segment - (tommy_ptrdiff_t)array->bucket_max * array->element_size; + + ++array->bucket_bit; + array->bucket_max = 1 << array->bucket_bit; + } +} + +tommy_size_t tommy_arrayof_memory_usage(tommy_arrayof* array) +{ + return array->bucket_max * (tommy_size_t)array->element_size; +} + diff --git a/third-party/tommyds/tommyarrayof.h b/third-party/tommyds/tommyarrayof.h new file mode 100644 index 0000000..8454838 --- /dev/null +++ b/third-party/tommyds/tommyarrayof.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2013, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Dynamic array based on segments of exponential growing size. + * + * This array is able to grow dynamically upon request, without any reallocation. + * + * This is very similar at ::tommy_array, but it allows to store elements of any + * size and not just pointers. + * + * Note that in this case tommy_arrayof_ref() returns a pointer to the element, + * that should be used for getting and setting elements in the array, + * as generic getter and setter are not available. + */ + +#ifndef __TOMMYARRAYOF_H +#define __TOMMYARRAYOF_H + +#include "tommytypes.h" + +#include <assert.h> /* for assert */ + +/******************************************************************************/ +/* array */ + +/** + * Initial and minimal size of the array expressed as a power of 2. + * The initial size is 2^TOMMY_ARRAYOF_BIT. + */ +#define TOMMY_ARRAYOF_BIT 6 + +/** \internal + * Max number of elements as a power of 2. + */ +#define TOMMY_ARRAYOF_BIT_MAX 32 + +/** + * Array container type. + * \note Don't use internal fields directly, but access the container only using functions. + */ +typedef struct tommy_arrayof_struct { + void* bucket[TOMMY_ARRAYOF_BIT_MAX]; /**< Dynamic array of buckets. */ + tommy_size_t element_size; /**< Size of the stored element in bytes. */ + tommy_uint_t bucket_bit; /**< Bits used in the bit mask. */ + tommy_count_t bucket_max; /**< Number of buckets. */ + tommy_count_t count; /**< Number of initialized elements in the array. */ +} tommy_arrayof; + +/** + * Initializes the array. + * \param element_size Size in byte of the element to store in the array. + */ +void tommy_arrayof_init(tommy_arrayof* array, tommy_size_t element_size); + +/** + * Deinitializes the array. + */ +void tommy_arrayof_done(tommy_arrayof* array); + +/** + * Grows the size up to the specified value. + * All the new elements in the array are initialized with the 0 value. + */ +void tommy_arrayof_grow(tommy_arrayof* array, tommy_count_t size); + +/** + * Gets a reference of the element at the specified position. + * You must be sure that space for this position is already + * allocated calling tommy_arrayof_grow(). + */ +tommy_inline void* tommy_arrayof_ref(tommy_arrayof* array, tommy_count_t pos) +{ + unsigned char* ptr; + tommy_uint_t bsr; + + assert(pos < array->count); + + /* get the highest bit set, in case of all 0, return 0 */ + bsr = tommy_ilog2_u32(pos | 1); + + ptr = tommy_cast(unsigned char*, array->bucket[bsr]); + + return ptr + pos * array->element_size; +} + +/** + * Gets the initialized size of the array. + */ +tommy_inline tommy_count_t tommy_arrayof_size(tommy_arrayof* array) +{ + return array->count; +} + +/** + * Gets the size of allocated memory. + */ +tommy_size_t tommy_arrayof_memory_usage(tommy_arrayof* array); + +#endif + diff --git a/third-party/tommyds/tommychain.h b/third-party/tommyds/tommychain.h new file mode 100644 index 0000000..b15bc6a --- /dev/null +++ b/third-party/tommyds/tommychain.h @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Chain of nodes. + * A chain of nodes is an abstraction used to implements complex list operations + * like sorting. + * + * Do not use this directly. Use lists instead. + */ + +#ifndef __TOMMYCHAIN_H +#define __TOMMYCHAIN_H + +#include "tommytypes.h" + +/******************************************************************************/ +/* chain */ + +/** + * Chain of nodes. + * A chain of nodes is a sequence of nodes with the following properties: + * - It contains at least one node. A chains of zero nodes cannot exist. + * - The next field of the tail is of *undefined* value. + * - The prev field of the head is of *undefined* value. + * - All the other inner prev and next fields are correctly set. + */ +typedef struct tommy_chain_struct { + tommy_node* head; /**< Pointer to the head of the chain. */ + tommy_node* tail; /**< Pointer to the tail of the chain. */ +} tommy_chain; + +/** + * Splices a chain in the middle of another chain. + */ +tommy_inline void tommy_chain_splice(tommy_node* first_before, tommy_node* first_after, tommy_node* second_head, tommy_node* second_tail) +{ + /* set the prev list */ + first_after->prev = second_tail; + second_head->prev = first_before; + + /* set the next list */ + first_before->next = second_head; + second_tail->next = first_after; +} + +/** + * Concats two chains. + */ +tommy_inline void tommy_chain_concat(tommy_node* first_tail, tommy_node* second_head) +{ + /* set the prev list */ + second_head->prev = first_tail; + + /* set the next list */ + first_tail->next = second_head; +} + +/** + * Merges two chains. + */ +tommy_inline void tommy_chain_merge(tommy_chain* first, tommy_chain* second, tommy_compare_func* cmp) +{ + tommy_node* first_i = first->head; + tommy_node* second_i = second->head; + + /* merge */ + while (1) { + if (cmp(first_i->data, second_i->data) > 0) { + tommy_node* next = second_i->next; + if (first_i == first->head) { + tommy_chain_concat(second_i, first_i); + first->head = second_i; + } else { + tommy_chain_splice(first_i->prev, first_i, second_i, second_i); + } + if (second_i == second->tail) + break; + second_i = next; + } else { + if (first_i == first->tail) { + tommy_chain_concat(first_i, second_i); + first->tail = second->tail; + break; + } + first_i = first_i->next; + } + } +} + +/** + * Merges two chains managing special degenerated cases. + * It's funtionally equivalent at tommy_chain_merge() but faster with already ordered chains. + */ +tommy_inline void tommy_chain_merge_degenerated(tommy_chain* first, tommy_chain* second, tommy_compare_func* cmp) +{ + /* identify the condition first <= second */ + if (cmp(first->tail->data, second->head->data) <= 0) { + tommy_chain_concat(first->tail, second->head); + first->tail = second->tail; + return; + } + + /* identify the condition second < first */ + /* here we must be strict on comparison to keep the sort stable */ + if (cmp(second->tail->data, first->head->data) < 0) { + tommy_chain_concat(second->tail, first->head); + first->head = second->head; + return; + } + + tommy_chain_merge(first, second, cmp); +} + +/** + * Max number of elements as a power of 2. + */ +#define TOMMY_CHAIN_BIT_MAX 32 + +/** + * Sorts a chain. + * It's a stable merge sort using power of 2 buckets, with O(N*log(N)) complexity, + * similar at the one used in the SGI STL libraries and in the Linux Kernel, + * but faster on degenerated cases like already ordered lists. + * + * SGI STL stl_list.h + * http://www.sgi.com/tech/stl/stl_list.h + * + * Linux Kernel lib/list_sort.c + * http://lxr.linux.no/#linux+v2.6.36/lib/list_sort.c + */ +tommy_inline void tommy_chain_mergesort(tommy_chain* chain, tommy_compare_func* cmp) +{ + /* + * Bit buckets of chains. + * Each bucket contains 2^i nodes or it's empty. + * The chain at address TOMMY_CHAIN_BIT_MAX is an independet variable operating as "carry". + * We keep it in the same "bit" vector to avoid reports from the valgrind tool sgcheck. + */ + tommy_chain bit[TOMMY_CHAIN_BIT_MAX + 1]; + + /** + * Value stored inside the bit bucket. + * It's used to know which bucket is empty of full. + */ + tommy_count_t counter; + tommy_node* node = chain->head; + tommy_node* tail = chain->tail; + tommy_count_t mask; + tommy_count_t i; + + counter = 0; + while (1) { + tommy_node* next; + tommy_chain* last; + + /* carry bit to add */ + last = &bit[TOMMY_CHAIN_BIT_MAX]; + bit[TOMMY_CHAIN_BIT_MAX].head = node; + bit[TOMMY_CHAIN_BIT_MAX].tail = node; + next = node->next; + + /* add the bit, propagating the carry */ + i = 0; + mask = counter; + while ((mask & 1) != 0) { + tommy_chain_merge_degenerated(&bit[i], last, cmp); + mask >>= 1; + last = &bit[i]; + ++i; + } + + /* copy the carry in the first empty bit */ + bit[i] = *last; + + /* add the carry in the counter */ + ++counter; + + if (node == tail) + break; + node = next; + } + + /* merge the buckets */ + i = tommy_ctz_u32(counter); + mask = counter >> i; + while (mask != 1) { + mask >>= 1; + if (mask & 1) + tommy_chain_merge_degenerated(&bit[i + 1], &bit[i], cmp); + else + bit[i + 1] = bit[i]; + ++i; + } + + *chain = bit[i]; +} + +#endif + diff --git a/third-party/tommyds/tommyhash.c b/third-party/tommyds/tommyhash.c new file mode 100644 index 0000000..cc7495d --- /dev/null +++ b/third-party/tommyds/tommyhash.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommyhash.h" + +/******************************************************************************/ +/* hash */ + +tommy_inline tommy_uint32_t tommy_le_uint32_read(const void* ptr) +{ + /* allow unaligned read on Intel x86 and x86_64 platforms */ +#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__x86_64__) || defined(_M_X64) + /* defines from http://predef.sourceforge.net/ */ + return *(const tommy_uint32_t*)ptr; +#else + const unsigned char* ptr8 = tommy_cast(const unsigned char*, ptr); + return ptr8[0] + ((tommy_uint32_t)ptr8[1] << 8) + ((tommy_uint32_t)ptr8[2] << 16) + ((tommy_uint32_t)ptr8[3] << 24); +#endif +} + +#define tommy_rot(x, k) \ + (((x) << (k)) | ((x) >> (32 - (k)))) + +#define tommy_mix(a, b, c) \ + do { \ + a -= c; a ^= tommy_rot(c, 4); c += b; \ + b -= a; b ^= tommy_rot(a, 6); a += c; \ + c -= b; c ^= tommy_rot(b, 8); b += a; \ + a -= c; a ^= tommy_rot(c, 16); c += b; \ + b -= a; b ^= tommy_rot(a, 19); a += c; \ + c -= b; c ^= tommy_rot(b, 4); b += a; \ + } while (0) + +#define tommy_final(a, b, c) \ + do { \ + c ^= b; c -= tommy_rot(b, 14); \ + a ^= c; a -= tommy_rot(c, 11); \ + b ^= a; b -= tommy_rot(a, 25); \ + c ^= b; c -= tommy_rot(b, 16); \ + a ^= c; a -= tommy_rot(c, 4); \ + b ^= a; b -= tommy_rot(a, 14); \ + c ^= b; c -= tommy_rot(b, 24); \ + } while (0) + +tommy_uint32_t tommy_hash_u32(tommy_uint32_t init_val, const void* void_key, tommy_size_t key_len) +{ + const unsigned char* key = tommy_cast(const unsigned char*, void_key); + tommy_uint32_t a, b, c; + + a = b = c = 0xdeadbeef + ((tommy_uint32_t)key_len) + init_val; + + while (key_len > 12) { + a += tommy_le_uint32_read(key + 0); + b += tommy_le_uint32_read(key + 4); + c += tommy_le_uint32_read(key + 8); + + tommy_mix(a, b, c); + + key_len -= 12; + key += 12; + } + + switch (key_len) { + case 0 : + return c; /* used only when called with a zero length */ + case 12 : + c += tommy_le_uint32_read(key + 8); + b += tommy_le_uint32_read(key + 4); + a += tommy_le_uint32_read(key + 0); + break; + case 11 : c += ((tommy_uint32_t)key[10]) << 16; /* fallthrough */ + case 10 : c += ((tommy_uint32_t)key[9]) << 8; /* fallthrough */ + case 9 : c += key[8]; /* fallthrough */ + case 8 : + b += tommy_le_uint32_read(key + 4); + a += tommy_le_uint32_read(key + 0); + break; + case 7 : b += ((tommy_uint32_t)key[6]) << 16; /* fallthrough */ + case 6 : b += ((tommy_uint32_t)key[5]) << 8; /* fallthrough */ + case 5 : b += key[4]; /* fallthrough */ + case 4 : + a += tommy_le_uint32_read(key + 0); + break; + case 3 : a += ((tommy_uint32_t)key[2]) << 16; /* fallthrough */ + case 2 : a += ((tommy_uint32_t)key[1]) << 8; /* fallthrough */ + case 1 : a += key[0]; /* fallthrough */ + } + + tommy_final(a, b, c); + + return c; +} + +tommy_uint64_t tommy_hash_u64(tommy_uint64_t init_val, const void* void_key, tommy_size_t key_len) +{ + const unsigned char* key = tommy_cast(const unsigned char*, void_key); + tommy_uint32_t a, b, c; + + a = b = c = 0xdeadbeef + ((tommy_uint32_t)key_len) + (init_val & 0xffffffff); + c += init_val >> 32; + + while (key_len > 12) { + a += tommy_le_uint32_read(key + 0); + b += tommy_le_uint32_read(key + 4); + c += tommy_le_uint32_read(key + 8); + + tommy_mix(a, b, c); + + key_len -= 12; + key += 12; + } + + switch (key_len) { + case 0 : + return c + ((tommy_uint64_t)b << 32); /* used only when called with a zero length */ + case 12 : + c += tommy_le_uint32_read(key + 8); + b += tommy_le_uint32_read(key + 4); + a += tommy_le_uint32_read(key + 0); + break; + case 11 : c += ((tommy_uint32_t)key[10]) << 16; /* fallthrough */ + case 10 : c += ((tommy_uint32_t)key[9]) << 8; /* fallthrough */ + case 9 : c += key[8]; /* fallthrough */ + case 8 : + b += tommy_le_uint32_read(key + 4); + a += tommy_le_uint32_read(key + 0); + break; + case 7 : b += ((tommy_uint32_t)key[6]) << 16; /* fallthrough */ + case 6 : b += ((tommy_uint32_t)key[5]) << 8; /* fallthrough */ + case 5 : b += key[4]; /* fallthrough */ + case 4 : + a += tommy_le_uint32_read(key + 0); + break; + case 3 : a += ((tommy_uint32_t)key[2]) << 16; /* fallthrough */ + case 2 : a += ((tommy_uint32_t)key[1]) << 8; /* fallthrough */ + case 1 : a += key[0]; /* fallthrough */ + } + + tommy_final(a, b, c); + + return c + ((tommy_uint64_t)b << 32); +} + +tommy_uint32_t tommy_strhash_u32(tommy_uint64_t init_val, const void* void_key) +{ + const unsigned char* key = tommy_cast(const unsigned char*, void_key); + tommy_uint32_t a, b, c; + tommy_uint32_t m[3] = { 0xff, 0xff00, 0xff0000 }; + + a = b = c = 0xdeadbeef + init_val; + /* this is different than original lookup3 and the result won't match */ + + while (1) { + tommy_uint32_t v = tommy_le_uint32_read(key); + + if (tommy_haszero_u32(v)) { + if (v & m[0]) { + a += v & m[0]; + if (v & m[1]) { + a += v & m[1]; + if (v & m[2]) + a += v & m[2]; + } + } + + break; + } + + a += v; + + v = tommy_le_uint32_read(key + 4); + + if (tommy_haszero_u32(v)) { + if (v & m[0]) { + b += v & m[0]; + if (v & m[1]) { + b += v & m[1]; + if (v & m[2]) + b += v & m[2]; + } + } + + break; + } + + b += v; + + v = tommy_le_uint32_read(key + 8); + + if (tommy_haszero_u32(v)) { + if (v & m[0]) { + c += v & m[0]; + if (v & m[1]) { + c += v & m[1]; + if (v & m[2]) + c += v & m[2]; + } + } + + break; + } + + c += v; + + tommy_mix(a, b, c); + + key += 12; + } + + /* for lengths that are multiplers of 12 we already have called mix */ + /* this is different than the original lookup3 and the result won't match */ + + tommy_final(a, b, c); + + return c; +} + diff --git a/third-party/tommyds/tommyhash.h b/third-party/tommyds/tommyhash.h new file mode 100644 index 0000000..738e33d --- /dev/null +++ b/third-party/tommyds/tommyhash.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Hash functions for the use with ::tommy_hashtable, ::tommy_hashdyn and ::tommy_hashlin. + */ + +#ifndef __TOMMYHASH_H +#define __TOMMYHASH_H + +#include "tommytypes.h" + +/******************************************************************************/ +/* hash */ + +/** + * Hash type used in hashtables. + */ +typedef tommy_key_t tommy_hash_t; + +/** + * Hash function with a 32 bits result. + * Implementation of the Robert Jenkins "lookup3" hash 32 bits version, + * from http://www.burtleburtle.net/bob/hash/doobs.html, function hashlittle(). + * + * This hash is designed to provide a good overall performance in all platforms, + * including 32 bits. If you target only 64 bits, you can use faster hashes, + * like SpookyHash or FarmHash. + * + * \param init_val Initialization value. + * Using a different initialization value, you can generate a completely different set of hash values. + * Use 0 if not relevant. + * \param void_key Pointer to the data to hash. + * \param key_len Size of the data to hash. + * \note + * This function is endianess independent. + * \return The hash value of 32 bits. + */ +tommy_uint32_t tommy_hash_u32(tommy_uint32_t init_val, const void* void_key, tommy_size_t key_len); + +/** + * Hash function with a 64 bits result. + * Implementation of the Robert Jenkins "lookup3" hash 64 bits versions, + * from http://www.burtleburtle.net/bob/hash/doobs.html, function hashlittle2(). + * + * This hash is designed to provide a good overall performance in all platforms, + * including 32 bits. If you target only 64 bits, you can use faster hashes, + * like SpookyHash or FarmHash. + * + * \param init_val Initialization value. + * Using a different initialization value, you can generate a completely different set of hash values. + * Use 0 if not relevant. + * \param void_key Pointer to the data to hash. + * \param key_len Size of the data to hash. + * \note + * This function is endianess independent. + * \return The hash value of 64 bits. + */ +tommy_uint64_t tommy_hash_u64(tommy_uint64_t init_val, const void* void_key, tommy_size_t key_len); + +/** + * String hash function with a 32 bits result. + * Implementation is based on the the Robert Jenkins "lookup3" hash 32 bits version, + * from http://www.burtleburtle.net/bob/hash/doobs.html, function hashlittle(). + * + * This hash is designed to handle strings with an unknown length. If you + * know the string length, the other hash functions are surely faster. + * + * \param init_val Initialization value. + * Using a different initialization value, you can generate a completely different set of hash values. + * Use 0 if not relevant. + * \param void_key Pointer to the string to hash. It has to be 0 terminated. + * \note + * This function is endianess independent. + * \return The hash value of 32 bits. + */ +tommy_uint32_t tommy_strhash_u32(tommy_uint64_t init_val, const void* void_key); + +/** + * Integer reversible hash function for 32 bits. + * Implementation of the Robert Jenkins "4-byte Integer Hashing", + * from http://burtleburtle.net/bob/hash/integer.html + */ +tommy_inline tommy_uint32_t tommy_inthash_u32(tommy_uint32_t key) +{ + key -= key << 6; + key ^= key >> 17; + key -= key << 9; + key ^= key << 4; + key -= key << 3; + key ^= key << 10; + key ^= key >> 15; + + return key; +} + +/** + * Integer reversible hash function for 64 bits. + * Implementation of the Thomas Wang "Integer Hash Function", + * from http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm + */ +tommy_inline tommy_uint64_t tommy_inthash_u64(tommy_uint64_t key) +{ + key = ~key + (key << 21); + key = key ^ (key >> 24); + key = key + (key << 3) + (key << 8); + key = key ^ (key >> 14); + key = key + (key << 2) + (key << 4); + key = key ^ (key >> 28); + key = key + (key << 31); + + return key; +} + +#endif + diff --git a/third-party/tommyds/tommyhashdyn.c b/third-party/tommyds/tommyhashdyn.c new file mode 100644 index 0000000..02ef404 --- /dev/null +++ b/third-party/tommyds/tommyhashdyn.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommyhashdyn.h" +#include "tommylist.h" + +/******************************************************************************/ +/* hashdyn */ + +void tommy_hashdyn_init(tommy_hashdyn* hashdyn) +{ + /* fixed initial size */ + hashdyn->bucket_bit = TOMMY_HASHDYN_BIT; + hashdyn->bucket_max = 1 << hashdyn->bucket_bit; + hashdyn->bucket_mask = hashdyn->bucket_max - 1; + hashdyn->bucket = tommy_cast(tommy_hashdyn_node**, tommy_calloc(hashdyn->bucket_max, sizeof(tommy_hashdyn_node*))); + + hashdyn->count = 0; +} + +void tommy_hashdyn_done(tommy_hashdyn* hashdyn) +{ + tommy_free(hashdyn->bucket); +} + +/** + * Resize the bucket vector. + */ +static void tommy_hashdyn_resize(tommy_hashdyn* hashdyn, tommy_count_t new_bucket_bit) +{ + tommy_count_t bucket_bit; + tommy_count_t bucket_max; + tommy_count_t new_bucket_max; + tommy_count_t new_bucket_mask; + tommy_hashdyn_node** new_bucket; + + bucket_bit = hashdyn->bucket_bit; + bucket_max = hashdyn->bucket_max; + + new_bucket_max = 1 << new_bucket_bit; + new_bucket_mask = new_bucket_max - 1; + + /* allocate the new vector using malloc() and not calloc() */ + /* because data is fully initialized in the update process */ + new_bucket = tommy_cast(tommy_hashdyn_node**, tommy_malloc(new_bucket_max * sizeof(tommy_hashdyn_node*))); + + /* reinsert all the elements */ + if (new_bucket_bit > bucket_bit) { + tommy_count_t i; + + /* grow */ + for (i = 0; i < bucket_max; ++i) { + tommy_hashdyn_node* j; + + /* setup the new two buckets */ + new_bucket[i] = 0; + new_bucket[i + bucket_max] = 0; + + /* reinsert the bucket */ + j = hashdyn->bucket[i]; + while (j) { + tommy_hashdyn_node* j_next = j->next; + tommy_count_t pos = j->key & new_bucket_mask; + if (new_bucket[pos]) + tommy_list_insert_tail_not_empty(new_bucket[pos], j); + else + tommy_list_insert_first(&new_bucket[pos], j); + j = j_next; + } + } + } else { + tommy_count_t i; + + /* shrink */ + for (i = 0; i < new_bucket_max; ++i) { + /* setup the new bucket with the lower bucket*/ + new_bucket[i] = hashdyn->bucket[i]; + + /* concat the upper bucket */ + tommy_list_concat(&new_bucket[i], &hashdyn->bucket[i + new_bucket_max]); + } + } + + tommy_free(hashdyn->bucket); + + /* setup */ + hashdyn->bucket_bit = new_bucket_bit; + hashdyn->bucket_max = new_bucket_max; + hashdyn->bucket_mask = new_bucket_mask; + hashdyn->bucket = new_bucket; +} + +/** + * Grow. + */ +tommy_inline void hashdyn_grow_step(tommy_hashdyn* hashdyn) +{ + /* grow if more than 50% full */ + if (hashdyn->count >= hashdyn->bucket_max / 2) + tommy_hashdyn_resize(hashdyn, hashdyn->bucket_bit + 1); +} + +/** + * Shrink. + */ +tommy_inline void hashdyn_shrink_step(tommy_hashdyn* hashdyn) +{ + /* shrink if less than 12.5% full */ + if (hashdyn->count <= hashdyn->bucket_max / 8 && hashdyn->bucket_bit > TOMMY_HASHDYN_BIT) + tommy_hashdyn_resize(hashdyn, hashdyn->bucket_bit - 1); +} + +void tommy_hashdyn_insert(tommy_hashdyn* hashdyn, tommy_hashdyn_node* node, void* data, tommy_hash_t hash) +{ + tommy_count_t pos = hash & hashdyn->bucket_mask; + + tommy_list_insert_tail(&hashdyn->bucket[pos], node, data); + + node->key = hash; + + ++hashdyn->count; + + hashdyn_grow_step(hashdyn); +} + +void* tommy_hashdyn_remove_existing(tommy_hashdyn* hashdyn, tommy_hashdyn_node* node) +{ + tommy_count_t pos = node->key & hashdyn->bucket_mask; + + tommy_list_remove_existing(&hashdyn->bucket[pos], node); + + --hashdyn->count; + + hashdyn_shrink_step(hashdyn); + + return node->data; +} + +void* tommy_hashdyn_remove(tommy_hashdyn* hashdyn, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash) +{ + tommy_count_t pos = hash & hashdyn->bucket_mask; + tommy_hashdyn_node* node = hashdyn->bucket[pos]; + + while (node) { + /* we first check if the hash matches, as in the same bucket we may have multiples hash values */ + if (node->key == hash && cmp(cmp_arg, node->data) == 0) { + tommy_list_remove_existing(&hashdyn->bucket[pos], node); + + --hashdyn->count; + + hashdyn_shrink_step(hashdyn); + + return node->data; + } + node = node->next; + } + + return 0; +} + +void tommy_hashdyn_foreach(tommy_hashdyn* hashdyn, tommy_foreach_func* func) +{ + tommy_count_t bucket_max = hashdyn->bucket_max; + tommy_hashdyn_node** bucket = hashdyn->bucket; + tommy_count_t pos; + + for (pos = 0; pos < bucket_max; ++pos) { + tommy_hashdyn_node* node = bucket[pos]; + + while (node) { + void* data = node->data; + node = node->next; + func(data); + } + } +} + +void tommy_hashdyn_foreach_arg(tommy_hashdyn* hashdyn, tommy_foreach_arg_func* func, void* arg) +{ + tommy_count_t bucket_max = hashdyn->bucket_max; + tommy_hashdyn_node** bucket = hashdyn->bucket; + tommy_count_t pos; + + for (pos = 0; pos < bucket_max; ++pos) { + tommy_hashdyn_node* node = bucket[pos]; + + while (node) { + void* data = node->data; + node = node->next; + func(arg, data); + } + } +} + +tommy_size_t tommy_hashdyn_memory_usage(tommy_hashdyn* hashdyn) +{ + return hashdyn->bucket_max * (tommy_size_t)sizeof(hashdyn->bucket[0]) + + tommy_hashdyn_count(hashdyn) * (tommy_size_t)sizeof(tommy_hashdyn_node); +} + diff --git a/third-party/tommyds/tommyhashdyn.h b/third-party/tommyds/tommyhashdyn.h new file mode 100644 index 0000000..ed4a607 --- /dev/null +++ b/third-party/tommyds/tommyhashdyn.h @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Dynamic chained hashtable. + * + * This hashtable resizes dynamically. It starts with the minimal size of 16 buckets, it doubles + * the size then it reaches a load factor greater than 0.5 and it halves the size with a load + * factor lower than 0.125. + * + * All the elements are reallocated in a single resize operation done inside + * tommy_hashdyn_insert() or tommy_hashdyn_remove(). + * + * Note that the resize operation takes approximatively 100 [ms] with 1 million of elements, + * and 1 [second] with 10 millions. This could be a problem in real-time applications. + * + * The resize also fragment the heap, as it involves allocating a double-sized table, copy elements, + * and deallocating the older table. Leaving a big hole in the heap. + * + * The ::tommy_hashlin hashtable fixes both problems. + * + * To initialize the hashtable you have to call tommy_hashdyn_init(). + * + * \code + * tommy_hashslin hashdyn; + * + * tommy_hashdyn_init(&hashdyn); + * \endcode + * + * To insert elements in the hashtable you have to call tommy_hashdyn_insert() for + * each element. + * In the insertion call you have to specify the address of the node, the + * address of the object, and the hash value of the key to use. + * The address of the object is used to initialize the tommy_node::data field + * of the node, and the hash to initialize the tommy_node::key field. + * + * \code + * struct object { + * int value; + * // other fields + * tommy_node node; + * }; + * + * struct object* obj = malloc(sizeof(struct object)); // creates the object + * + * obj->value = ...; // initializes the object + * + * tommy_hashdyn_insert(&hashdyn, &obj->node, obj, tommy_inthash_u32(obj->value)); // inserts the object + * \endcode + * + * To find and element in the hashtable you have to call tommy_hashtable_search() + * providing a comparison function, its argument, and the hash of the key to search. + * + * \code + * int compare(const void* arg, const void* obj) + * { + * return *(const int*)arg != ((const struct object*)obj)->value; + * } + * + * int value_to_find = 1; + * struct object* obj = tommy_hashdyn_search(&hashdyn, compare, &value_to_find, tommy_inthash_u32(value_to_find)); + * if (!obj) { + * // not found + * } else { + * // found + * } + * \endcode + * + * To iterate over all the elements in the hashtable with the same key, you have to + * use tommy_hashdyn_bucket() and follow the tommy_node::next pointer until NULL. + * You have also to check explicitely for the key, as the bucket may contains + * different keys. + * + * \code + * int value_to_find = 1; + * tommy_node* i = tommy_hashdyn_bucket(&hashdyn, tommy_inthash_u32(value_to_find)); + * while (i) { + * struct object* obj = i->data; // gets the object pointer + * + * if (obj->value == value_to_find) { + * printf("%d\n", obj->value); // process the object + * } + * + * i = i->next; // goes to the next element + * } + * \endcode + * + * To remove an element from the hashtable you have to call tommy_hashdyn_remove() + * providing a comparison function, its argument, and the hash of the key to search + * and remove. + * + * \code + * struct object* obj = tommy_hashdyn_remove(&hashdyn, compare, &value_to_remove, tommy_inthash_u32(value_to_remove)); + * if (obj) { + * free(obj); // frees the object allocated memory + * } + * \endcode + * + * To destroy the hashtable you have to remove all the elements, and deinitialize + * the hashtable calling tommy_hashdyn_done(). + * + * \code + * tommy_hashdyn_done(&hashdyn); + * \endcode + * + * If you need to iterate over all the elements in the hashtable, you can use + * tommy_hashdyn_foreach() or tommy_hashdyn_foreach_arg(). + * If you need a more precise control with a real iteration, you have to insert + * all the elements also in a ::tommy_list, and use the list to iterate. + * See the \ref multiindex example for more detail. + */ + +#ifndef __TOMMYHASHDYN_H +#define __TOMMYHASHDYN_H + +#include "tommyhash.h" + +/******************************************************************************/ +/* hashdyn */ + +/** \internal + * Initial and minimal size of the hashtable expressed as a power of 2. + * The initial size is 2^TOMMY_HASHDYN_BIT. + */ +#define TOMMY_HASHDYN_BIT 4 + +/** + * Hashtable node. + * This is the node that you have to include inside your objects. + */ +typedef tommy_node tommy_hashdyn_node; + +/** + * Hashtable container type. + * \note Don't use internal fields directly, but access the container only using functions. + */ +typedef struct tommy_hashdyn_struct { + tommy_hashdyn_node** bucket; /**< Hash buckets. One list for each hash modulus. */ + tommy_uint_t bucket_bit; /**< Bits used in the bit mask. */ + tommy_count_t bucket_max; /**< Number of buckets. */ + tommy_count_t bucket_mask; /**< Bit mask to access the buckets. */ + tommy_count_t count; /**< Number of elements. */ +} tommy_hashdyn; + +/** + * Initializes the hashtable. + */ +void tommy_hashdyn_init(tommy_hashdyn* hashdyn); + +/** + * Deinitializes the hashtable. + * + * You can call this function with elements still contained, + * but such elements are not going to be freed by this call. + */ +void tommy_hashdyn_done(tommy_hashdyn* hashdyn); + +/** + * Inserts an element in the hashtable. + */ +void tommy_hashdyn_insert(tommy_hashdyn* hashdyn, tommy_hashdyn_node* node, void* data, tommy_hash_t hash); + +/** + * Searches and removes an element from the hashtable. + * You have to provide a compare function and the hash of the element you want to remove. + * If the element is not found, 0 is returned. + * If more equal elements are present, the first one is removed. + * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one. + * The function should return 0 for equal elements, anything other for different elements. + * \param cmp_arg Compare argument passed as first argument of the compare function. + * \param hash Hash of the element to find and remove. + * \return The removed element, or 0 if not found. + */ +void* tommy_hashdyn_remove(tommy_hashdyn* hashdyn, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash); + +/** + * Gets the bucket of the specified hash. + * The bucket is guaranteed to contain ALL the elements with the specified hash, + * but it can contain also others. + * You can access elements in the bucket following the ::next pointer until 0. + * \param hash Hash of the element to find. + * \return The head of the bucket, or 0 if empty. + */ +tommy_inline tommy_hashdyn_node* tommy_hashdyn_bucket(tommy_hashdyn* hashdyn, tommy_hash_t hash) +{ + return hashdyn->bucket[hash & hashdyn->bucket_mask]; +} + +/** + * Searches an element in the hashtable. + * You have to provide a compare function and the hash of the element you want to find. + * If more equal elements are present, the first one is returned. + * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one. + * The function should return 0 for equal elements, anything other for different elements. + * \param cmp_arg Compare argument passed as first argument of the compare function. + * \param hash Hash of the element to find. + * \return The first element found, or 0 if none. + */ +tommy_inline void* tommy_hashdyn_search(tommy_hashdyn* hashdyn, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash) +{ + tommy_hashdyn_node* i = tommy_hashdyn_bucket(hashdyn, hash); + + while (i) { + /* we first check if the hash matches, as in the same bucket we may have multiples hash values */ + if (i->key == hash && cmp(cmp_arg, i->data) == 0) + return i->data; + i = i->next; + } + return 0; +} + +/** + * Removes an element from the hashtable. + * You must already have the address of the element to remove. + * \return The tommy_node::data field of the node removed. + */ +void* tommy_hashdyn_remove_existing(tommy_hashdyn* hashdyn, tommy_hashdyn_node* node); + +/** + * Calls the specified function for each element in the hashtable. + * + * You cannot add or remove elements from the inside of the callback, + * but can use it to deallocate them. + * + * \code + * tommy_hashdyn hashdyn; + * + * // initializes the hashtable + * tommy_hashdyn_init(&hashdyn); + * + * ... + * + * // creates an object + * struct object* obj = malloc(sizeof(struct object)); + * + * ... + * + * // insert it in the hashtable + * tommy_hashdyn_insert(&hashdyn, &obj->node, obj, tommy_inthash_u32(obj->value)); + * + * ... + * + * // deallocates all the objects iterating the hashtable + * tommy_hashdyn_foreach(&hashdyn, free); + * + * // deallocates the hashtable + * tommy_hashdyn_done(&hashdyn); + * \endcode + */ +void tommy_hashdyn_foreach(tommy_hashdyn* hashdyn, tommy_foreach_func* func); + +/** + * Calls the specified function with an argument for each element in the hashtable. + */ +void tommy_hashdyn_foreach_arg(tommy_hashdyn* hashdyn, tommy_foreach_arg_func* func, void* arg); + +/** + * Gets the number of elements. + */ +tommy_inline tommy_count_t tommy_hashdyn_count(tommy_hashdyn* hashdyn) +{ + return hashdyn->count; +} + +/** + * Gets the size of allocated memory. + * It includes the size of the ::tommy_hashdyn_node of the stored elements. + */ +tommy_size_t tommy_hashdyn_memory_usage(tommy_hashdyn* hashdyn); + +#endif + diff --git a/third-party/tommyds/tommyhashlin.c b/third-party/tommyds/tommyhashlin.c new file mode 100644 index 0000000..bcf7d98 --- /dev/null +++ b/third-party/tommyds/tommyhashlin.c @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommyhashlin.h" +#include "tommylist.h" + +#include <assert.h> /* for assert */ + +/******************************************************************************/ +/* hashlin */ + +/** + * Reallocation states. + */ +#define TOMMY_HASHLIN_STATE_STABLE 0 +#define TOMMY_HASHLIN_STATE_GROW 1 +#define TOMMY_HASHLIN_STATE_SHRINK 2 + +/** + * Set the hashtable in stable state. + */ +tommy_inline void tommy_hashlin_stable(tommy_hashlin* hashlin) +{ + hashlin->state = TOMMY_HASHLIN_STATE_STABLE; + + /* setup low_mask/max/split to allow tommy_hashlin_bucket_ref() */ + /* and tommy_hashlin_foreach() to work regardless we are in stable state */ + hashlin->low_max = hashlin->bucket_max; + hashlin->low_mask = hashlin->bucket_mask; + hashlin->split = 0; +} + +void tommy_hashlin_init(tommy_hashlin* hashlin) +{ + tommy_uint_t i; + + /* fixed initial size */ + hashlin->bucket_bit = TOMMY_HASHLIN_BIT; + hashlin->bucket_max = 1 << hashlin->bucket_bit; + hashlin->bucket_mask = hashlin->bucket_max - 1; + hashlin->bucket[0] = tommy_cast(tommy_hashlin_node**, tommy_calloc(hashlin->bucket_max, sizeof(tommy_hashlin_node*))); + for (i = 1; i < TOMMY_HASHLIN_BIT; ++i) + hashlin->bucket[i] = hashlin->bucket[0]; + + /* stable state */ + tommy_hashlin_stable(hashlin); + + hashlin->count = 0; +} + +void tommy_hashlin_done(tommy_hashlin* hashlin) +{ + tommy_uint_t i; + + tommy_free(hashlin->bucket[0]); + for (i = TOMMY_HASHLIN_BIT; i < hashlin->bucket_bit; ++i) { + tommy_hashlin_node** segment = hashlin->bucket[i]; + tommy_free(&segment[((tommy_ptrdiff_t)1) << i]); + } +} + +/** + * Grow one step. + */ +tommy_inline void hashlin_grow_step(tommy_hashlin* hashlin) +{ + /* grow if more than 50% full */ + if (hashlin->state != TOMMY_HASHLIN_STATE_GROW + && hashlin->count > hashlin->bucket_max / 2 + ) { + /* if we are stable, setup a new grow state */ + /* otherwise continue with the already setup shrink one */ + /* but in backward direction */ + if (hashlin->state == TOMMY_HASHLIN_STATE_STABLE) { + tommy_hashlin_node** segment; + + /* set the lower size */ + hashlin->low_max = hashlin->bucket_max; + hashlin->low_mask = hashlin->bucket_mask; + + /* allocate the new vector using malloc() and not calloc() */ + /* because data is fully initialized in the split process */ + segment = tommy_cast(tommy_hashlin_node**, tommy_malloc(hashlin->low_max * sizeof(tommy_hashlin_node*))); + + /* store it adjusting the offset */ + /* cast to ptrdiff_t to ensure to get a negative value */ + hashlin->bucket[hashlin->bucket_bit] = &segment[-(tommy_ptrdiff_t)hashlin->low_max]; + + /* grow the hash size */ + ++hashlin->bucket_bit; + hashlin->bucket_max = 1 << hashlin->bucket_bit; + hashlin->bucket_mask = hashlin->bucket_max - 1; + + /* start from the beginning going forward */ + hashlin->split = 0; + } + + /* grow state */ + hashlin->state = TOMMY_HASHLIN_STATE_GROW; + } + + /* if we are growing */ + if (hashlin->state == TOMMY_HASHLIN_STATE_GROW) { + /* compute the split target required to finish the reallocation before the next resize */ + tommy_count_t split_target = 2 * hashlin->count; + + /* reallocate buckets until the split target */ + while (hashlin->split + hashlin->low_max < split_target) { + tommy_hashlin_node** split[2]; + tommy_hashlin_node* j; + tommy_count_t mask; + + /* get the low bucket */ + split[0] = tommy_hashlin_pos(hashlin, hashlin->split); + + /* get the high bucket */ + split[1] = tommy_hashlin_pos(hashlin, hashlin->split + hashlin->low_max); + + /* save the low bucket */ + j = *split[0]; + + /* reinitialize the buckets */ + *split[0] = 0; + *split[1] = 0; + + /* the bit used to identify the bucket */ + mask = hashlin->low_max; + + /* flush the bucket */ + while (j) { + tommy_hashlin_node* j_next = j->next; + tommy_count_t pos = (j->key & mask) != 0; + if (*split[pos]) + tommy_list_insert_tail_not_empty(*split[pos], j); + else + tommy_list_insert_first(split[pos], j); + j = j_next; + } + + /* go forward */ + ++hashlin->split; + + /* if we have finished, change the state */ + if (hashlin->split == hashlin->low_max) { + /* go in stable mode */ + tommy_hashlin_stable(hashlin); + break; + } + } + } +} + +/** + * Shrink one step. + */ +tommy_inline void hashlin_shrink_step(tommy_hashlin* hashlin) +{ + /* shrink if less than 12.5% full */ + if (hashlin->state != TOMMY_HASHLIN_STATE_SHRINK + && hashlin->count < hashlin->bucket_max / 8 + ) { + /* avoid to shrink the first bucket */ + if (hashlin->bucket_bit > TOMMY_HASHLIN_BIT) { + /* if we are stable, setup a new shrink state */ + /* otherwise continue with the already setup grow one */ + /* but in backward direction */ + if (hashlin->state == TOMMY_HASHLIN_STATE_STABLE) { + /* set the lower size */ + hashlin->low_max = hashlin->bucket_max / 2; + hashlin->low_mask = hashlin->bucket_mask / 2; + + /* start from the half going backward */ + hashlin->split = hashlin->low_max; + } + + /* start reallocation */ + hashlin->state = TOMMY_HASHLIN_STATE_SHRINK; + } + } + + /* if we are shrinking */ + if (hashlin->state == TOMMY_HASHLIN_STATE_SHRINK) { + /* compute the split target required to finish the reallocation before the next resize */ + tommy_count_t split_target = 8 * hashlin->count; + + /* reallocate buckets until the split target */ + while (hashlin->split + hashlin->low_max > split_target) { + tommy_hashlin_node** split[2]; + + /* go backward position */ + --hashlin->split; + + /* get the low bucket */ + split[0] = tommy_hashlin_pos(hashlin, hashlin->split); + + /* get the high bucket */ + split[1] = tommy_hashlin_pos(hashlin, hashlin->split + hashlin->low_max); + + /* concat the high bucket into the low one */ + tommy_list_concat(split[0], split[1]); + + /* if we have finished, clean up and change the state */ + if (hashlin->split == 0) { + tommy_hashlin_node** segment; + + /* shrink the hash size */ + --hashlin->bucket_bit; + hashlin->bucket_max = 1 << hashlin->bucket_bit; + hashlin->bucket_mask = hashlin->bucket_max - 1; + + /* free the last segment */ + segment = hashlin->bucket[hashlin->bucket_bit]; + tommy_free(&segment[((tommy_ptrdiff_t)1) << hashlin->bucket_bit]); + + /* go in stable mode */ + tommy_hashlin_stable(hashlin); + break; + } + } + } +} + +void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void* data, tommy_hash_t hash) +{ + tommy_list_insert_tail(tommy_hashlin_bucket_ref(hashlin, hash), node, data); + + node->key = hash; + + ++hashlin->count; + + hashlin_grow_step(hashlin); +} + +void* tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node* node) +{ + tommy_list_remove_existing(tommy_hashlin_bucket_ref(hashlin, node->key), node); + + --hashlin->count; + + hashlin_shrink_step(hashlin); + + return node->data; +} + +void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash) +{ + tommy_hashlin_node** let_ptr = tommy_hashlin_bucket_ref(hashlin, hash); + tommy_hashlin_node* node = *let_ptr; + + while (node) { + /* we first check if the hash matches, as in the same bucket we may have multiples hash values */ + if (node->key == hash && cmp(cmp_arg, node->data) == 0) { + tommy_list_remove_existing(let_ptr, node); + + --hashlin->count; + + hashlin_shrink_step(hashlin); + + return node->data; + } + node = node->next; + } + + return 0; +} + +void tommy_hashlin_foreach(tommy_hashlin* hashlin, tommy_foreach_func* func) +{ + tommy_count_t bucket_max; + tommy_count_t pos; + + /* number of valid buckets */ + bucket_max = hashlin->low_max + hashlin->split; + + for (pos = 0; pos < bucket_max; ++pos) { + tommy_hashlin_node* node = *tommy_hashlin_pos(hashlin, pos); + + while (node) { + void* data = node->data; + node = node->next; + func(data); + } + } +} + +void tommy_hashlin_foreach_arg(tommy_hashlin* hashlin, tommy_foreach_arg_func* func, void* arg) +{ + tommy_count_t bucket_max; + tommy_count_t pos; + + /* number of valid buckets */ + bucket_max = hashlin->low_max + hashlin->split; + + for (pos = 0; pos < bucket_max; ++pos) { + tommy_hashlin_node* node = *tommy_hashlin_pos(hashlin, pos); + + while (node) { + void* data = node->data; + node = node->next; + func(arg, data); + } + } +} + +tommy_size_t tommy_hashlin_memory_usage(tommy_hashlin* hashlin) +{ + return hashlin->bucket_max * (tommy_size_t)sizeof(hashlin->bucket[0][0]) + + hashlin->count * (tommy_size_t)sizeof(tommy_hashlin_node); +} + diff --git a/third-party/tommyds/tommyhashlin.h b/third-party/tommyds/tommyhashlin.h new file mode 100644 index 0000000..eaf59e6 --- /dev/null +++ b/third-party/tommyds/tommyhashlin.h @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Linear chained hashtable. + * + * This hashtable resizes dynamically and progressively using a variation of the + * linear hashing algorithm described in http://en.wikipedia.org/wiki/Linear_hashing + * + * It starts with the minimal size of 16 buckets, it doubles the size then it + * reaches a load factor greater than 0.5 and it halves the size with a load + * factor lower than 0.125. + * + * The progressive resize is good for real-time and interactive applications + * as it makes insert and delete operations taking always the same time. + * + * For resizing it's used a dynamic array that supports access to not contigous + * segments. + * In this way we only allocate additional table segments on the heap, without + * freeing the previous table, and then not increasing the heap fragmentation. + * + * The resize takes place inside tommy_hashlin_insert() and tommy_hashlin_remove(). + * No resize is done in the tommy_hashlin_search() operation. + * + * To initialize the hashtable you have to call tommy_hashlin_init(). + * + * \code + * tommy_hashslin hashlin; + * + * tommy_hashlin_init(&hashlin); + * \endcode + * + * To insert elements in the hashtable you have to call tommy_hashlin_insert() for + * each element. + * In the insertion call you have to specify the address of the node, the + * address of the object, and the hash value of the key to use. + * The address of the object is used to initialize the tommy_node::data field + * of the node, and the hash to initialize the tommy_node::key field. + * + * \code + * struct object { + * int value; + * // other fields + * tommy_node node; + * }; + * + * struct object* obj = malloc(sizeof(struct object)); // creates the object + * + * obj->value = ...; // initializes the object + * + * tommy_hashlin_insert(&hashlin, &obj->node, obj, tommy_inthash_u32(obj->value)); // inserts the object + * \endcode + * + * To find and element in the hashtable you have to call tommy_hashtable_search() + * providing a comparison function, its argument, and the hash of the key to search. + * + * \code + * int compare(const void* arg, const void* obj) + * { + * return *(const int*)arg != ((const struct object*)obj)->value; + * } + * + * int value_to_find = 1; + * struct object* obj = tommy_hashlin_search(&hashlin, compare, &value_to_find, tommy_inthash_u32(value_to_find)); + * if (!obj) { + * // not found + * } else { + * // found + * } + * \endcode + * + * To iterate over all the elements in the hashtable with the same key, you have to + * use tommy_hashlin_bucket() and follow the tommy_node::next pointer until NULL. + * You have also to check explicitely for the key, as the bucket may contains + * different keys. + * + * \code + * int value_to_find = 1; + * tommy_node* i = tommy_hashlin_bucket(&hashlin, tommy_inthash_u32(value_to_find)); + * while (i) { + * struct object* obj = i->data; // gets the object pointer + * + * if (obj->value == value_to_find) { + * printf("%d\n", obj->value); // process the object + * } + * + * i = i->next; // goes to the next element + * } + * \endcode + * + * To remove an element from the hashtable you have to call tommy_hashlin_remove() + * providing a comparison function, its argument, and the hash of the key to search + * and remove. + * + * \code + * struct object* obj = tommy_hashlin_remove(&hashlin, compare, &value_to_remove, tommy_inthash_u32(value_to_remove)); + * if (obj) { + * free(obj); // frees the object allocated memory + * } + * \endcode + * + * To destroy the hashtable you have to remove all the elements, and deinitialize + * the hashtable calling tommy_hashlin_done(). + * + * \code + * tommy_hashlin_done(&hashlin); + * \endcode + * + * If you need to iterate over all the elements in the hashtable, you can use + * tommy_hashlin_foreach() or tommy_hashlin_foreach_arg(). + * If you need a more precise control with a real iteration, you have to insert + * all the elements also in a ::tommy_list, and use the list to iterate. + * See the \ref multiindex example for more detail. + */ + +#ifndef __TOMMYHASHLIN_H +#define __TOMMYHASHLIN_H + +#include "tommyhash.h" + +/******************************************************************************/ +/* hashlin */ + +/** \internal + * Initial and minimal size of the hashtable expressed as a power of 2. + * The initial size is 2^TOMMY_HASHLIN_BIT. + */ +#define TOMMY_HASHLIN_BIT 6 + +/** + * Hashtable node. + * This is the node that you have to include inside your objects. + */ +typedef tommy_node tommy_hashlin_node; + +/** \internal + * Max number of elements as a power of 2. + */ +#define TOMMY_HASHLIN_BIT_MAX 32 + +/** + * Hashtable container type. + * \note Don't use internal fields directly, but access the container only using functions. + */ +typedef struct tommy_hashlin_struct { + tommy_hashlin_node** bucket[TOMMY_HASHLIN_BIT_MAX]; /**< Dynamic array of hash buckets. One list for each hash modulus. */ + tommy_uint_t bucket_bit; /**< Bits used in the bit mask. */ + tommy_count_t bucket_max; /**< Number of buckets. */ + tommy_count_t bucket_mask; /**< Bit mask to access the buckets. */ + tommy_count_t low_max; /**< Low order max value. */ + tommy_count_t low_mask; /**< Low order mask value. */ + tommy_count_t split; /**< Split position. */ + tommy_count_t count; /**< Number of elements. */ + tommy_uint_t state; /**< Reallocation state. */ +} tommy_hashlin; + +/** + * Initializes the hashtable. + */ +void tommy_hashlin_init(tommy_hashlin* hashlin); + +/** + * Deinitializes the hashtable. + * + * You can call this function with elements still contained, + * but such elements are not going to be freed by this call. + */ +void tommy_hashlin_done(tommy_hashlin* hashlin); + +/** + * Inserts an element in the hashtable. + */ +void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void* data, tommy_hash_t hash); + +/** + * Searches and removes an element from the hashtable. + * You have to provide a compare function and the hash of the element you want to remove. + * If the element is not found, 0 is returned. + * If more equal elements are present, the first one is removed. + * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one. + * The function should return 0 for equal elements, anything other for different elements. + * \param cmp_arg Compare argument passed as first argument of the compare function. + * \param hash Hash of the element to find and remove. + * \return The removed element, or 0 if not found. + */ +void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash); + +/** \internal + * Returns the bucket at the specified position. + */ +tommy_inline tommy_hashlin_node** tommy_hashlin_pos(tommy_hashlin* hashlin, tommy_hash_t pos) +{ + tommy_uint_t bsr; + + /* get the highest bit set, in case of all 0, return 0 */ + bsr = tommy_ilog2_u32(pos | 1); + + return &hashlin->bucket[bsr][pos]; +} + +/** \internal + * Returns a pointer to the bucket of the specified hash. + */ +tommy_inline tommy_hashlin_node** tommy_hashlin_bucket_ref(tommy_hashlin* hashlin, tommy_hash_t hash) +{ + tommy_count_t pos; + tommy_count_t high_pos; + + pos = hash & hashlin->low_mask; + high_pos = hash & hashlin->bucket_mask; + + /* if this position is already allocated in the high half */ + if (pos < hashlin->split) { + /* The following assigment is expected to be implemented */ + /* with a conditional move instruction */ + /* that results in a little better and constant performance */ + /* regardless of the split position. */ + /* This affects mostly the worst case, when the split value */ + /* is near at its half, resulting in a totally unpredictable */ + /* condition by the CPU. */ + /* In such case the use of the conditional move is generally faster. */ + + /* use also the high bit */ + pos = high_pos; + } + + return tommy_hashlin_pos(hashlin, pos); +} + +/** + * Gets the bucket of the specified hash. + * The bucket is guaranteed to contain ALL the elements with the specified hash, + * but it can contain also others. + * You can access elements in the bucket following the ::next pointer until 0. + * \param hash Hash of the element to find. + * \return The head of the bucket, or 0 if empty. + */ +tommy_inline tommy_hashlin_node* tommy_hashlin_bucket(tommy_hashlin* hashlin, tommy_hash_t hash) +{ + return *tommy_hashlin_bucket_ref(hashlin, hash); +} + +/** + * Searches an element in the hashtable. + * You have to provide a compare function and the hash of the element you want to find. + * If more equal elements are present, the first one is returned. + * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one. + * The function should return 0 for equal elements, anything other for different elements. + * \param cmp_arg Compare argument passed as first argument of the compare function. + * \param hash Hash of the element to find. + * \return The first element found, or 0 if none. + */ +tommy_inline void* tommy_hashlin_search(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash) +{ + tommy_hashlin_node* i = tommy_hashlin_bucket(hashlin, hash); + + while (i) { + /* we first check if the hash matches, as in the same bucket we may have multiples hash values */ + if (i->key == hash && cmp(cmp_arg, i->data) == 0) + return i->data; + i = i->next; + } + return 0; +} + +/** + * Removes an element from the hashtable. + * You must already have the address of the element to remove. + * \return The tommy_node::data field of the node removed. + */ +void* tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node* node); + +/** + * Calls the specified function for each element in the hashtable. + * + * You cannot add or remove elements from the inside of the callback, + * but can use it to deallocate them. + * + * \code + * tommy_hashlin hashlin; + * + * // initializes the hashtable + * tommy_hashlin_init(&hashlin); + * + * ... + * + * // creates an object + * struct object* obj = malloc(sizeof(struct object)); + * + * ... + * + * // insert it in the hashtable + * tommy_hashlin_insert(&hashlin, &obj->node, obj, tommy_inthash_u32(obj->value)); + * + * ... + * + * // deallocates all the objects iterating the hashtable + * tommy_hashlin_foreach(&hashlin, free); + * + * // deallocates the hashtable + * tommy_hashlin_done(&hashlin); + * \endcode + */ +void tommy_hashlin_foreach(tommy_hashlin* hashlin, tommy_foreach_func* func); + +/** + * Calls the specified function with an argument for each element in the hashtable. + */ +void tommy_hashlin_foreach_arg(tommy_hashlin* hashlin, tommy_foreach_arg_func* func, void* arg); + +/** + * Gets the number of elements. + */ +tommy_inline tommy_count_t tommy_hashlin_count(tommy_hashlin* hashlin) +{ + return hashlin->count; +} + +/** + * Gets the size of allocated memory. + * It includes the size of the ::tommy_hashlin_node of the stored elements. + */ +tommy_size_t tommy_hashlin_memory_usage(tommy_hashlin* hashlin); + +#endif + diff --git a/third-party/tommyds/tommyhashtbl.c b/third-party/tommyds/tommyhashtbl.c new file mode 100644 index 0000000..007b8ec --- /dev/null +++ b/third-party/tommyds/tommyhashtbl.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommyhashtbl.h" +#include "tommylist.h" + +#include <string.h> /* for memset */ + +/******************************************************************************/ +/* hashtable */ + +void tommy_hashtable_init(tommy_hashtable* hashtable, tommy_count_t bucket_max) +{ + if (bucket_max < 16) + bucket_max = 16; + else + bucket_max = tommy_roundup_pow2_u32(bucket_max); + + hashtable->bucket_max = bucket_max; + hashtable->bucket_mask = hashtable->bucket_max - 1; + + /* initialize the vector using malloc()+memset() instead of calloc() */ + /* to ensure that all the memory in really allocated immediately */ + /* by the OS, and not deferred at later time. */ + /* this improves performance, because we start with a fully initialized hashtable. */ + hashtable->bucket = tommy_cast(tommy_hashtable_node**, tommy_malloc(hashtable->bucket_max * sizeof(tommy_hashtable_node*))); + memset(hashtable->bucket, 0, hashtable->bucket_max * sizeof(tommy_hashtable_node*)); + + hashtable->count = 0; +} + +void tommy_hashtable_done(tommy_hashtable* hashtable) +{ + tommy_free(hashtable->bucket); +} + +void tommy_hashtable_insert(tommy_hashtable* hashtable, tommy_hashtable_node* node, void* data, tommy_hash_t hash) +{ + tommy_count_t pos = hash & hashtable->bucket_mask; + + tommy_list_insert_tail(&hashtable->bucket[pos], node, data); + + node->key = hash; + + ++hashtable->count; +} + +void* tommy_hashtable_remove_existing(tommy_hashtable* hashtable, tommy_hashtable_node* node) +{ + tommy_count_t pos = node->key & hashtable->bucket_mask; + + tommy_list_remove_existing(&hashtable->bucket[pos], node); + + --hashtable->count; + + return node->data; +} + +void* tommy_hashtable_remove(tommy_hashtable* hashtable, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash) +{ + tommy_count_t pos = hash & hashtable->bucket_mask; + tommy_hashtable_node* node = hashtable->bucket[pos]; + + while (node) { + /* we first check if the hash matches, as in the same bucket we may have multiples hash values */ + if (node->key == hash && cmp(cmp_arg, node->data) == 0) { + tommy_list_remove_existing(&hashtable->bucket[pos], node); + + --hashtable->count; + + return node->data; + } + node = node->next; + } + + return 0; +} + +void tommy_hashtable_foreach(tommy_hashtable* hashtable, tommy_foreach_func* func) +{ + tommy_count_t bucket_max = hashtable->bucket_max; + tommy_hashtable_node** bucket = hashtable->bucket; + tommy_count_t pos; + + for (pos = 0; pos < bucket_max; ++pos) { + tommy_hashtable_node* node = bucket[pos]; + + while (node) { + void* data = node->data; + node = node->next; + func(data); + } + } +} + +void tommy_hashtable_foreach_arg(tommy_hashtable* hashtable, tommy_foreach_arg_func* func, void* arg) +{ + tommy_count_t bucket_max = hashtable->bucket_max; + tommy_hashtable_node** bucket = hashtable->bucket; + tommy_count_t pos; + + for (pos = 0; pos < bucket_max; ++pos) { + tommy_hashtable_node* node = bucket[pos]; + + while (node) { + void* data = node->data; + node = node->next; + func(arg, data); + } + } +} + +tommy_size_t tommy_hashtable_memory_usage(tommy_hashtable* hashtable) +{ + return hashtable->bucket_max * (tommy_size_t)sizeof(hashtable->bucket[0]) + + tommy_hashtable_count(hashtable) * (tommy_size_t)sizeof(tommy_hashtable_node); +} + diff --git a/third-party/tommyds/tommyhashtbl.h b/third-party/tommyds/tommyhashtbl.h new file mode 100644 index 0000000..0cb6332 --- /dev/null +++ b/third-party/tommyds/tommyhashtbl.h @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Fixed size chained hashtable. + * + * This hashtable is a standard implementation of a chained hashtable with a fixed size. + * + * Note that performances starts to degenerate after reaching a load factor greater than 0.75. + * The ::tommy_hashdyn and ::tommy_hashlin hashtables fix this problem growing dynamically. + * + * To initialize the hashtable you have to call tommy_hashtable_init() specifing + * the fixed bucket size. + * + * \code + * tommy_hashslin hashtable; + * + * tommy_hashtable_init(&hashtable, 1024); + * \endcode + * + * To insert elements in the hashtable you have to call tommy_hashtable_insert() for + * each element. + * In the insertion call you have to specify the address of the node, the + * address of the object, and the hash value of the key to use. + * The address of the object is used to initialize the tommy_node::data field + * of the node, and the hash to initialize the tommy_node::key field. + * + * \code + * struct object { + * int value; + * // other fields + * tommy_node node; + * }; + * + * struct object* obj = malloc(sizeof(struct object)); // creates the object + * + * obj->value = ...; // initializes the object + * + * tommy_hashtable_insert(&hashtable, &obj->node, obj, tommy_inthash_u32(obj->value)); // inserts the object + * \endcode + * + * To find and element in the hashtable you have to call tommy_hashtable_search() + * providing a comparison function, its argument, and the hash of the key to search. + * + * \code + * int compare(const void* arg, const void* obj) + * { + * return *(const int*)arg != ((const struct object*)obj)->value; + * } + * + * int value_to_find = 1; + * struct object* obj = tommy_hashtable_search(&hashtable, compare, &value_to_find, tommy_inthash_u32(value_to_find)); + * if (!obj) { + * // not found + * } else { + * // found + * } + * \endcode + * + * To iterate over all the elements in the hashtable with the same key, you have to + * use tommy_hashtable_bucket() and follow the tommy_node::next pointer until NULL. + * You have also to check explicitely for the key, as the bucket may contains + * different keys. + * + * \code + * tommy_node* i = tommy_hashtable_bucket(&hashtable, tommy_inthash_u32(value_to_find)); + * while (i) { + * struct object* obj = i->data; // gets the object pointer + * + * if (obj->value == value_to_find) { + * printf("%d\n", obj->value); // process the object + * } + * + * i = i->next; // goes to the next element + * } + * \endcode + * + * To remove an element from the hashtable you have to call tommy_hashtable_remove() + * providing a comparison function, its argument, and the hash of the key to search + * and remove. + * + * \code + * struct object* obj = tommy_hashtable_remove(&hashtable, compare, &value_to_remove, tommy_inthash_u32(value_to_remove)); + * if (obj) { + * free(obj); // frees the object allocated memory + * } + * \endcode + * + * To destroy the hashtable you have to remove all the elements, and deinitialize + * the hashtable calling tommy_hashtable_done(). + * + * \code + * tommy_hashtable_done(&hashtable); + * \endcode + * + * If you need to iterate over all the elements in the hashtable, you can use + * tommy_hashtable_foreach() or tommy_hashtable_foreach_arg(). + * If you need a more precise control with a real iteration, you have to insert + * all the elements also in a ::tommy_list, and use the list to iterate. + * See the \ref multiindex example for more detail. + */ + +#ifndef __TOMMYHASHTBL_H +#define __TOMMYHASHTBL_H + +#include "tommyhash.h" + +/******************************************************************************/ +/* hashtable */ + +/** + * Hashtable node. + * This is the node that you have to include inside your objects. + */ +typedef tommy_node tommy_hashtable_node; + +/** + * Hashtable container type. + * \note Don't use internal fields directly, but access the container only using functions. + */ +typedef struct tommy_hashtable_struct { + tommy_hashtable_node** bucket; /**< Hash buckets. One list for each hash modulus. */ + tommy_count_t bucket_max; /**< Number of buckets. */ + tommy_count_t bucket_mask; /**< Bit mask to access the buckets. */ + tommy_count_t count; /**< Number of elements. */ +} tommy_hashtable; + +/** + * Initializes the hashtable. + * \param buckets Minimum number of buckets to allocate. The effective number used is the next power of 2. + */ +void tommy_hashtable_init(tommy_hashtable* hashtable, tommy_count_t bucket_max); + +/** + * Deinitializes the hashtable. + * + * You can call this function with elements still contained, + * but such elements are not going to be freed by this call. + */ +void tommy_hashtable_done(tommy_hashtable* hashtable); + +/** + * Inserts an element in the hashtable. + */ +void tommy_hashtable_insert(tommy_hashtable* hashtable, tommy_hashtable_node* node, void* data, tommy_hash_t hash); + +/** + * Searches and removes an element from the hashtable. + * You have to provide a compare function and the hash of the element you want to remove. + * If the element is not found, 0 is returned. + * If more equal elements are present, the first one is removed. + * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one. + * The function should return 0 for equal elements, anything other for different elements. + * \param cmp_arg Compare argument passed as first argument of the compare function. + * \param hash Hash of the element to find and remove. + * \return The removed element, or 0 if not found. + */ +void* tommy_hashtable_remove(tommy_hashtable* hashtable, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash); + +/** + * Gets the bucket of the specified hash. + * The bucket is guaranteed to contain ALL the elements with the specified hash, + * but it can contain also others. + * You can access elements in the bucket following the ::next pointer until 0. + * \param hash Hash of the element to find. + * \return The head of the bucket, or 0 if empty. + */ +tommy_inline tommy_hashtable_node* tommy_hashtable_bucket(tommy_hashtable* hashtable, tommy_hash_t hash) +{ + return hashtable->bucket[hash & hashtable->bucket_mask]; +} + +/** + * Searches an element in the hashtable. + * You have to provide a compare function and the hash of the element you want to find. + * If more equal elements are present, the first one is returned. + * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one. + * The function should return 0 for equal elements, anything other for different elements. + * \param cmp_arg Compare argument passed as first argument of the compare function. + * \param hash Hash of the element to find. + * \return The first element found, or 0 if none. + */ +tommy_inline void* tommy_hashtable_search(tommy_hashtable* hashtable, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash) +{ + tommy_hashtable_node* i = tommy_hashtable_bucket(hashtable, hash); + + while (i) { + /* we first check if the hash matches, as in the same bucket we may have multiples hash values */ + if (i->key == hash && cmp(cmp_arg, i->data) == 0) + return i->data; + i = i->next; + } + return 0; +} + +/** + * Removes an element from the hashtable. + * You must already have the address of the element to remove. + * \return The tommy_node::data field of the node removed. + */ +void* tommy_hashtable_remove_existing(tommy_hashtable* hashtable, tommy_hashtable_node* node); + +/** + * Calls the specified function for each element in the hashtable. + * + * You cannot add or remove elements from the inside of the callback, + * but can use it to deallocate them. + * + * \code + * tommy_hashtable hashtable; + * + * // initializes the hashtable + * tommy_hashtable_init(&hashtable, ...); + * + * ... + * + * // creates an object + * struct object* obj = malloc(sizeof(struct object)); + * + * ... + * + * // insert it in the hashtable + * tommy_hashdyn_insert(&hashtable, &obj->node, obj, tommy_inthash_u32(obj->value)); + * + * ... + * + * // deallocates all the objects iterating the hashtable + * tommy_hashtable_foreach(&hashtable, free); + * + * // deallocates the hashtable + * tommy_hashdyn_done(&hashtable); + * \endcode + */ +void tommy_hashtable_foreach(tommy_hashtable* hashtable, tommy_foreach_func* func); + +/** + * Calls the specified function with an argument for each element in the hashtable. + */ +void tommy_hashtable_foreach_arg(tommy_hashtable* hashtable, tommy_foreach_arg_func* func, void* arg); + +/** + * Gets the number of elements. + */ +tommy_inline tommy_count_t tommy_hashtable_count(tommy_hashtable* hashtable) +{ + return hashtable->count; +} + +/** + * Gets the size of allocated memory. + * It includes the size of the ::tommy_hashtable_node of the stored elements. + */ +tommy_size_t tommy_hashtable_memory_usage(tommy_hashtable* hashtable); + +#endif + diff --git a/third-party/tommyds/tommylist.c b/third-party/tommyds/tommylist.c new file mode 100644 index 0000000..1fa1f24 --- /dev/null +++ b/third-party/tommyds/tommylist.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommylist.h" +#include "tommychain.h" + +/** \internal + * Setup a list. + */ +tommy_inline void tommy_list_set(tommy_list* list, tommy_node* head, tommy_node* tail) +{ + head->prev = tail; + tail->next = 0; + *list = head; +} + +void tommy_list_sort(tommy_list* list, tommy_compare_func* cmp) +{ + tommy_chain chain; + tommy_node* head; + + if (tommy_list_empty(list)) + return; + + head = tommy_list_head(list); + + /* create a chain from the list */ + chain.head = head; + chain.tail = head->prev; + + tommy_chain_mergesort(&chain, cmp); + + /* restore the list */ + tommy_list_set(list, chain.head, chain.tail); +} + diff --git a/third-party/tommyds/tommylist.h b/third-party/tommyds/tommylist.h new file mode 100644 index 0000000..368606a --- /dev/null +++ b/third-party/tommyds/tommylist.h @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Double linked list for collisions into hashtables. + * + * This list is a double linked list mainly targetted for handling collisions + * into an hashtables, but useable also as a generic list. + * + * The main feature of this list is to require only one pointer to represent the + * list, compared to a classic implementation requiring a head an a tail pointers. + * This reduces the memory usage in hashtables. + * + * Another feature is to support the insertion at the end of the list. This allow to store + * collisions in a stable order. Where for stable order we mean that equal elements keep + * their insertion order. + * + * To initialize the list, you have to call tommy_list_init(), or to simply assign + * to it NULL, as an empty list is represented by the NULL value. + * + * \code + * tommy_list list; + * + * tommy_list_init(&list); // initializes the list + * \endcode + * + * To insert elements in the list you have to call tommy_list_insert_tail() + * or tommy_list_insert_head() for each element. + * In the insertion call you have to specify the address of the node and the + * address of the object. + * The address of the object is used to initialize the tommy_node::data field + * of the node. + * + * \code + * struct object { + * int value; + * // other fields + * tommy_node node; + * }; + * + * struct object* obj = malloc(sizeof(struct object)); // creates the object + * + * obj->value = ...; // initializes the object + * + * tommy_list_insert_tail(&list, &obj->node, obj); // inserts the object + * \endcode + * + * To iterate over all the elements in the list you have to call + * tommy_list_head() to get the head of the list and follow the + * tommy_node::next pointer until NULL. + * + * \code + * tommy_node* i = tommy_list_head(&list); + * while (i) { + * struct object* obj = i->data; // gets the object pointer + * + * printf("%d\n", obj->value); // process the object + * + * i = i->next; // go to the next element + * } + * \endcode + * + * To destroy the list you have to remove all the elements, + * as the list is completely inplace and it doesn't allocate memory. + * This can be done with the tommy_list_foreach() function. + * + * \code + * // deallocates all the objects iterating the list + * tommy_list_foreach(&list, free); + * \endcode + */ + +#ifndef __TOMMYLIST_H +#define __TOMMYLIST_H + +#include "tommytypes.h" + +/******************************************************************************/ +/* list */ + +/** + * Double linked list type. + */ +typedef tommy_node* tommy_list; + +/** + * Initializes the list. + * The list is completely inplace, so it doesn't need to be deinitialized. + */ +tommy_inline void tommy_list_init(tommy_list* list) +{ + *list = 0; +} + +/** + * Gets the head of the list. + * \return The head node. For empty lists 0 is returned. + */ +tommy_inline tommy_node* tommy_list_head(tommy_list* list) +{ + return *list; +} + +/** + * Gets the tail of the list. + * \return The tail node. For empty lists 0 is returned. + */ +tommy_inline tommy_node* tommy_list_tail(tommy_list* list) +{ + tommy_node* head = tommy_list_head(list); + + if (!head) + return 0; + + return head->prev; +} + +/** \internal + * Creates a new list with a single element. + * \param list The list to initialize. + * \param node The node to insert. + */ +tommy_inline void tommy_list_insert_first(tommy_list* list, tommy_node* node) +{ + /* one element "circular" prev list */ + node->prev = node; + + /* one element "0 terminated" next list */ + node->next = 0; + + *list = node; +} + +/** \internal + * Inserts an element at the head of a not empty list. + * The element is inserted at the head of the list. The list cannot be empty. + * \param list The list. The list cannot be empty. + * \param node The node to insert. + */ +tommy_inline void tommy_list_insert_head_not_empty(tommy_list* list, tommy_node* node) +{ + tommy_node* head = tommy_list_head(list); + + /* insert in the "circular" prev list */ + node->prev = head->prev; + head->prev = node; + + /* insert in the "0 terminated" next list */ + node->next = head; + + *list = node; +} + +/** \internal + * Inserts an element at the tail of a not empty list. + * The element is inserted at the tail of the list. The list cannot be empty. + * \param head The node at the list head. It cannot be 0. + * \param node The node to insert. + */ +tommy_inline void tommy_list_insert_tail_not_empty(tommy_node* head, tommy_node* node) +{ + /* insert in the "circular" prev list */ + node->prev = head->prev; + head->prev = node; + + /* insert in the "0 terminated" next list */ + node->next = 0; + node->prev->next = node; +} + +/** + * Inserts an element at the head of a list. + * \param node The node to insert. + * \param data The object containing the node. It's used to set the tommy_node::data field of the node. + */ +tommy_inline void tommy_list_insert_head(tommy_list* list, tommy_node* node, void* data) +{ + tommy_node* head = tommy_list_head(list); + + if (head) + tommy_list_insert_head_not_empty(list, node); + else + tommy_list_insert_first(list, node); + + node->data = data; +} + +/** + * Inserts an element at the tail of a list. + * \param node The node to insert. + * \param data The object containing the node. It's used to set the tommy_node::data field of the node. + */ +tommy_inline void tommy_list_insert_tail(tommy_list* list, tommy_node* node, void* data) +{ + tommy_node* head = tommy_list_head(list); + + if (head) + tommy_list_insert_tail_not_empty(head, node); + else + tommy_list_insert_first(list, node); + + node->data = data; +} + +/** + * Removes an element from the list. + * You must already have the address of the element to remove. + * \note The node content is left unchanged, including the tommy_node::next + * and tommy_node::prev fields that still contain pointers at the list. + * \param node The node to remove. The node must be in the list. + * \return The tommy_node::data field of the node removed. + */ +tommy_inline void* tommy_list_remove_existing(tommy_list* list, tommy_node* node) +{ + tommy_node* head = tommy_list_head(list); + + /* remove from the "circular" prev list */ + if (node->next) + node->next->prev = node->prev; + else + head->prev = node->prev; /* the last */ + + /* remove from the "0 terminated" next list */ + if (head == node) + *list = node->next; /* the new head, in case 0 */ + else + node->prev->next = node->next; + + return node->data; +} + +/** + * Concats two lists. + * The second list is concatenated at the first list. + * \param first The first list. + * \param second The second list. After this call the list content is undefined, + * and you should not use it anymore. + */ +tommy_inline void tommy_list_concat(tommy_list* first, tommy_list* second) +{ + tommy_node* first_head; + tommy_node* first_tail; + tommy_node* second_head; + + /* if the second is empty, nothing to do */ + second_head = tommy_list_head(second); + if (second_head == 0) + return; + + /* if the first is empty, copy the second */ + first_head = tommy_list_head(first); + if (first_head == 0) { + *first = *second; + return; + } + + /* tail of the first list */ + first_tail = first_head->prev; + + /* set the "circular" prev list */ + first_head->prev = second_head->prev; + second_head->prev = first_tail; + + /* set the "0 terminated" next list */ + first_tail->next = second_head; +} + +/** + * Sorts a list. + * It's a stable merge sort with O(N*log(N)) worst complexity. + * It's faster on degenerated cases like partially ordered lists. + * \param cmp Compare function called with two elements. + * The function should return <0 if the first element is less than the second, ==0 if equal, and >0 if greather. + */ +void tommy_list_sort(tommy_list* list, tommy_compare_func* cmp); + +/** + * Checks if empty. + * \return If the list is empty. + */ +tommy_inline tommy_bool_t tommy_list_empty(tommy_list* list) +{ + return tommy_list_head(list) == 0; +} + +/** + * Gets the number of elements. + * \note This operation is O(n). + */ +tommy_inline tommy_count_t tommy_list_count(tommy_list* list) +{ + tommy_count_t count = 0; + tommy_node* i = tommy_list_head(list); + + while (i) { + ++count; + i = i->next; + } + + return count; +} + +/** + * Calls the specified function for each element in the list. + * + * You cannot add or remove elements from the inside of the callback, + * but can use it to deallocate them. + * + * \code + * tommy_list list; + * + * // initializes the list + * tommy_list_init(&list); + * + * ... + * + * // creates an object + * struct object* obj = malloc(sizeof(struct object)); + * + * ... + * + * // insert it in the list + * tommy_list_insert_tail(&list, &obj->node, obj); + * + * ... + * + * // deallocates all the objects iterating the list + * tommy_list_foreach(&list, free); + * \endcode + */ +tommy_inline void tommy_list_foreach(tommy_list* list, tommy_foreach_func* func) +{ + tommy_node* node = tommy_list_head(list); + + while (node) { + void* data = node->data; + node = node->next; + func(data); + } +} + +/** + * Calls the specified function with an argument for each element in the list. + */ +tommy_inline void tommy_list_foreach_arg(tommy_list* list, tommy_foreach_arg_func* func, void* arg) +{ + tommy_node* node = tommy_list_head(list); + + while (node) { + void* data = node->data; + node = node->next; + func(arg, data); + } +} + +#endif + diff --git a/third-party/tommyds/tommytree.c b/third-party/tommyds/tommytree.c new file mode 100644 index 0000000..6b2f1a9 --- /dev/null +++ b/third-party/tommyds/tommytree.c @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2015, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommytree.h" + +#include <assert.h> /* for assert */ + +/******************************************************************************/ +/* tree */ + +void tommy_tree_init(tommy_tree* tree, tommy_compare_func* cmp) +{ + tree->root = 0; + tree->count = 0; + tree->cmp = cmp; +} + +static int tommy_tree_delta(tommy_tree_node* root) +{ + int left_height = root->prev ? root->prev->key : 0; + int right_height = root->next ? root->next->key : 0; + + return left_height - right_height; +} + +/* AVL tree operations */ +static tommy_tree_node* tommy_tree_balance(tommy_tree_node*); + +static tommy_tree_node* tommy_tree_rotate_left(tommy_tree_node* root) +{ + tommy_tree_node* next = root->next; + + root->next = next->prev; + + next->prev = tommy_tree_balance(root); + + return tommy_tree_balance(next); +} + +static tommy_tree_node* tommy_tree_rotate_right(tommy_tree_node* root) +{ + tommy_tree_node* prev = root->prev; + + root->prev = prev->next; + + prev->next = tommy_tree_balance(root); + + return tommy_tree_balance(prev); +} + +static tommy_tree_node* tommy_tree_move_right(tommy_tree_node* root, tommy_tree_node* node) +{ + if (!root) + return node; + + root->next = tommy_tree_move_right(root->next, node); + + return tommy_tree_balance(root); +} + +static tommy_tree_node* tommy_tree_balance(tommy_tree_node* root) +{ + int delta = tommy_tree_delta(root); + + if (delta < -1) { + if (tommy_tree_delta(root->next) > 0) + root->next = tommy_tree_rotate_right(root->next); + return tommy_tree_rotate_left(root); + } + + if (delta > 1) { + if (tommy_tree_delta(root->prev) < 0) + root->prev = tommy_tree_rotate_left(root->prev); + return tommy_tree_rotate_right(root); + } + + /* recompute key */ + root->key = 0; + + if (root->prev && root->prev->key > root->key) + root->key = root->prev->key; + + if (root->next && root->next->key > root->key) + root->key = root->next->key; + + /* count itself */ + root->key += 1; + + return root; +} + +static tommy_tree_node* tommy_tree_insert_node(tommy_compare_func* cmp, tommy_tree_node* root, tommy_tree_node** let) +{ + int c; + + if (!root) + return *let; + + c = cmp((*let)->data, root->data); + + if (c < 0) { + root->prev = tommy_tree_insert_node(cmp, root->prev, let); + return tommy_tree_balance(root); + } + + if (c > 0) { + root->next = tommy_tree_insert_node(cmp, root->next, let); + return tommy_tree_balance(root); + } + + /* already present, set the return pointer */ + *let = root; + + return root; +} + +void* tommy_tree_insert(tommy_tree* tree, tommy_tree_node* node, void* data) +{ + tommy_tree_node* insert = node; + + insert->data = data; + insert->prev = 0; + insert->next = 0; + insert->key = 0; + + tree->root = tommy_tree_insert_node(tree->cmp, tree->root, &insert); + + if (insert == node) + ++tree->count; + + return insert->data; +} + +static tommy_tree_node* tommy_tree_remove_node(tommy_compare_func* cmp, tommy_tree_node* root, void* data, tommy_tree_node** let) +{ + int c; + + if (!root) + return 0; + + c = cmp(data, root->data); + + if (c < 0) { + root->prev = tommy_tree_remove_node(cmp, root->prev, data, let); + return tommy_tree_balance(root); + } + + if (c > 0) { + root->next = tommy_tree_remove_node(cmp, root->next, data, let); + return tommy_tree_balance(root); + } + + /* found */ + *let = root; + + return tommy_tree_move_right(root->prev, root->next); +} + +void* tommy_tree_remove(tommy_tree* tree, void* data) +{ + tommy_tree_node* node = 0; + + tree->root = tommy_tree_remove_node(tree->cmp, tree->root, data, &node); + + if (!node) + return 0; + + --tree->count; + + return node->data; +} + +static tommy_tree_node* tommy_tree_search_node(tommy_compare_func* cmp, tommy_tree_node* root, void* data) +{ + int c; + + if (!root) + return 0; + + c = cmp(data, root->data); + + if (c < 0) + return tommy_tree_search_node(cmp, root->prev, data); + + if (c > 0) + return tommy_tree_search_node(cmp, root->next, data); + + return root; +} + +void* tommy_tree_search(tommy_tree* tree, void* data) +{ + tommy_tree_node* node = tommy_tree_search_node(tree->cmp, tree->root, data); + + if (!node) + return 0; + + return node->data; +} + +void* tommy_tree_search_compare(tommy_tree* tree, tommy_compare_func* cmp, void* cmp_arg) +{ + tommy_tree_node* node = tommy_tree_search_node(cmp, tree->root, cmp_arg); + + if (!node) + return 0; + + return node->data; +} + +void* tommy_tree_remove_existing(tommy_tree* tree, tommy_tree_node* node) +{ + void* data = tommy_tree_remove(tree, node->data); + + assert(data != 0); + + return data; +} + +static void tommy_tree_foreach_node(tommy_tree_node* root, tommy_foreach_func* func) +{ + tommy_tree_node* next; + + if (!root) + return; + + tommy_tree_foreach_node(root->prev, func); + + /* make a copy in case func is free() */ + next = root->next; + + func(root->data); + + tommy_tree_foreach_node(next, func); +} + +void tommy_tree_foreach(tommy_tree* tree, tommy_foreach_func* func) +{ + tommy_tree_foreach_node(tree->root, func); +} + +static void tommy_tree_foreach_arg_node(tommy_tree_node* root, tommy_foreach_arg_func* func, void* arg) +{ + tommy_tree_node* next; + + if (!root) + return; + + tommy_tree_foreach_arg_node(root->prev, func, arg); + + /* make a copy in case func is free() */ + next = root->next; + + func(arg, root->data); + + tommy_tree_foreach_arg_node(next, func, arg); +} + +void tommy_tree_foreach_arg(tommy_tree* tree, tommy_foreach_arg_func* func, void* arg) +{ + tommy_tree_foreach_arg_node(tree->root, func, arg); +} + +tommy_size_t tommy_tree_memory_usage(tommy_tree* tree) +{ + return tommy_tree_count(tree) * sizeof(tommy_tree_node); +} + diff --git a/third-party/tommyds/tommytree.h b/third-party/tommyds/tommytree.h new file mode 100644 index 0000000..55e4006 --- /dev/null +++ b/third-party/tommyds/tommytree.h @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2015, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * AVL tree. + * + * This tree is a standard AVL tree implementation that stores elements in the + * order defined by the comparison function. + * + * As difference than other tommy containers, duplicate elements cannot be inserted. + * + * To initialize a tree you have to call tommy_tree_init() specifing a comparison + * function that will define the order in the tree. + * + * \code + * tommy_tree tree; + * + * tommy_tree_init(&tree, cmp); + * \endcode + * + * To insert elements in the tree you have to call tommy_tree_insert() for + * each element. + * In the insertion call you have to specify the address of the node and the + * address of the object. + * The address of the object is used to initialize the tommy_node::data field + * of the node. + * + * \code + * struct object { + * int value; + * // other fields + * tommy_tree_node node; + * }; + * + * struct object* obj = malloc(sizeof(struct object)); // creates the object + * + * obj->value = ...; // initializes the object + * + * tommy_tree_insert(&tree, &obj->node, obj); // inserts the object + * \endcode + * + * To find and element in the tree you have to call tommy_tree_search() providing + * the key to search. + * + * \code + * struct object value_to_find = { 1 }; + * struct object* obj = tommy_tree_search(&tree, &value_to_find); + * if (!obj) { + * // not found + * } else { + * // found + * } + * \endcode + * + * To remove an element from the tree you have to call tommy_tree_remove() + * providing the key to search and remove. + * + * \code + * struct object value_to_remove = { 1 }; + * struct object* obj = tommy_tree_remove(&tree, &value_to_remove); + * if (obj) { + * free(obj); // frees the object allocated memory + * } + * \endcode + * + * To destroy the tree you have to remove or destroy all the contained elements. + * The tree itself doesn't have or need a deallocation function. + * + * If you need to iterate over all the elements in the tree, you can use + * tommy_tree_foreach() or tommy_tree_foreach_arg(). + * If you need a more precise control with a real iteration, you have to insert + * all the elements also in a ::tommy_list, and use the list to iterate. + * See the \ref multiindex example for more detail. + */ + +#ifndef __TOMMYTREE_H +#define __TOMMYTREE_H + +#include "tommytypes.h" + +/******************************************************************************/ +/* tree */ + +/** + * Tree node. + * This is the node that you have to include inside your objects. + */ +typedef tommy_node tommy_tree_node; + +/** + * Tree container type. + * \note Don't use internal fields directly, but access the container only using functions. + */ +typedef struct tommy_tree_struct { + tommy_tree_node* root; /**< Root node. */ + tommy_count_t count; /**< Number of elements. */ + tommy_compare_func* cmp; /**< Comparison function. */ +} tommy_tree; + +/** + * Initializes the tree. + * \param cmp The comparison function that defines the orderin the tree. + */ +void tommy_tree_init(tommy_tree* tree, tommy_compare_func* cmp); + +/** + * Inserts an element in the tree. + * If the element is already present, it's not inserted again. + * Check the return value to identify if the element was already present or not. + * You have to provide the pointer of the node embedded into the object and + * the pointer to the object. + * \param node Pointer to the node embedded into the object to insert. + * \param data Pointer to the object to insert. + * \return The element in the tree. Either the already existing one, or the one just inserted. + */ +void* tommy_tree_insert(tommy_tree* tree, tommy_tree_node* node, void* data); + +/** + * Searches and removes an element. + * If the element is not found, 0 is returned. + * \param data Element used for comparison. + * \return The removed element, or 0 if not found. + */ +void* tommy_tree_remove(tommy_tree* tree, void* data); + +/** + * Searches an element in the tree. + * If the element is not found, 0 is returned. + * \param data Element used for comparison. + * \return The first element found, or 0 if none. + */ +void* tommy_tree_search(tommy_tree* tree, void* data); + +/** + * Searches an element in the tree with a specific comparison function. + * + * Like tommy_tree_search() but you can specify a different comparison function. + * Note that this function must define a suborder of the original one. + * + * The ::data argument will be the first argument of the comparison function, + * and it can be of a different type of the objects in the tree. + */ +void* tommy_tree_search_compare(tommy_tree* tree, tommy_compare_func* cmp, void* cmp_arg); + +/** + * Removes an element from the tree. + * You must already have the address of the element to remove. + * \return The tommy_node::data field of the node removed. + */ +void* tommy_tree_remove_existing(tommy_tree* tree, tommy_tree_node* node); + +/** + * Calls the specified function for each element in the tree. + * + * The elements are processed in order. + * + * You cannot add or remove elements from the inside of the callback, + * but can use it to deallocate them. + * + * \code + * tommy_tree tree; + * + * // initializes the tree + * tommy_tree_init(&tree, cmp); + * + * ... + * + * // creates an object + * struct object* obj = malloc(sizeof(struct object)); + * + * ... + * + * // insert it in the tree + * tommy_tree_insert(&tree, &obj->node, obj); + * + * ... + * + * // deallocates all the objects iterating the tree + * tommy_tree_foreach(&tree, free); + * \endcode + */ +void tommy_tree_foreach(tommy_tree* tree, tommy_foreach_func* func); + +/** + * Calls the specified function with an argument for each element in the tree. + */ +void tommy_tree_foreach_arg(tommy_tree* tree, tommy_foreach_arg_func* func, void* arg); + +/** + * Gets the number of elements. + */ +tommy_inline tommy_count_t tommy_tree_count(tommy_tree* tree) +{ + return tree->count; +} + +/** + * Gets the size of allocated memory. + * It includes the size of the ::tommy_tree_node of the stored elements. + */ +tommy_size_t tommy_tree_memory_usage(tommy_tree* tree); + +#endif + diff --git a/third-party/tommyds/tommytrie.c b/third-party/tommyds/tommytrie.c new file mode 100644 index 0000000..35ac219 --- /dev/null +++ b/third-party/tommyds/tommytrie.c @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommytrie.h" +#include "tommylist.h" + +#include <assert.h> /* for assert */ + +/******************************************************************************/ +/* trie */ + +/** + * Mask for the inner branches. + */ +#define TOMMY_TRIE_TREE_MASK (TOMMY_TRIE_TREE_MAX - 1) + +/** + * Shift for the first level of branches. + */ +#define TOMMY_TRIE_BUCKET_SHIFT (TOMMY_KEY_BIT - TOMMY_TRIE_BUCKET_BIT) + +/** + * Max number of levels. + */ +#define TOMMY_TRIE_LEVEL_MAX ((TOMMY_KEY_BIT - TOMMY_TRIE_BUCKET_BIT) / TOMMY_TRIE_TREE_BIT) + +/** + * Hashtrie tree. + * A tree contains TOMMY_TRIE_TREE_MAX ordered pointers to <null/node/tree>. + * + * Each tree level uses exactly TOMMY_TRIE_TREE_BIT bits from the key. + */ +struct tommy_trie_tree_struct { + tommy_trie_node* map[TOMMY_TRIE_TREE_MAX]; +}; +typedef struct tommy_trie_tree_struct tommy_trie_tree; + +/** + * Kinds of an trie node. + */ +#define TOMMY_TRIE_TYPE_NODE 0 /**< The node is of type ::tommy_trie_node. */ +#define TOMMY_TRIE_TYPE_TREE 1 /**< The node is of type ::tommy_trie_tree. */ + +/** + * Get and set pointer of trie nodes. + * + * The pointer type is stored in the lower bit. + */ +#define trie_get_type(ptr) (((tommy_uintptr_t)(ptr)) & 1) +#define trie_get_tree(ptr) ((tommy_trie_tree*)(((tommy_uintptr_t)(ptr)) - TOMMY_TRIE_TYPE_TREE)) +#define trie_set_tree(ptr) (void*)(((tommy_uintptr_t)(ptr)) + TOMMY_TRIE_TYPE_TREE) + +void tommy_trie_init(tommy_trie* trie, tommy_allocator* alloc) +{ + tommy_uint_t i; + + for (i = 0; i < TOMMY_TRIE_BUCKET_MAX; ++i) + trie->bucket[i] = 0; + + trie->count = 0; + trie->node_count = 0; + + trie->alloc = alloc; +} + +static void trie_bucket_insert(tommy_trie* trie, tommy_uint_t shift, tommy_trie_node** let_ptr, tommy_trie_node* insert, tommy_key_t key) +{ + tommy_trie_tree* tree; + tommy_trie_node* node; + void* ptr; + tommy_uint_t i; + tommy_uint_t j; + +recurse: + ptr = *let_ptr; + + /* if null, just insert the node */ + if (!ptr) { + /* setup the node as a list */ + tommy_list_insert_first(let_ptr, insert); + return; + } + + if (trie_get_type(ptr) == TOMMY_TRIE_TYPE_TREE) { + /* repeat the process one level down */ + let_ptr = &trie_get_tree(ptr)->map[(key >> shift) & TOMMY_TRIE_TREE_MASK]; + shift -= TOMMY_TRIE_TREE_BIT; + goto recurse; + } + + node = tommy_cast(tommy_trie_node*, ptr); + + /* if it's the same key, insert in the list */ + if (node->key == key) { + tommy_list_insert_tail_not_empty(node, insert); + return; + } + +expand: + /* convert to a tree */ + tree = tommy_cast(tommy_trie_tree*, tommy_allocator_alloc(trie->alloc)); + ++trie->node_count; + *let_ptr = tommy_cast(tommy_trie_node*, trie_set_tree(tree)); + + /* initialize it */ + for (i = 0; i < TOMMY_TRIE_TREE_MAX; ++i) + tree->map[i] = 0; + + /* get the position of the two elements */ + i = (node->key >> shift) & TOMMY_TRIE_TREE_MASK; + j = (key >> shift) & TOMMY_TRIE_TREE_MASK; + + /* if they don't collide */ + if (i != j) { + /* insert the already existing element */ + tree->map[i] = node; + + /* insert the new node */ + tommy_list_insert_first(&tree->map[j], insert); + return; + } + + /* expand one more level */ + let_ptr = &tree->map[i]; + shift -= TOMMY_TRIE_TREE_BIT; + goto expand; +} + +void tommy_trie_insert(tommy_trie* trie, tommy_trie_node* node, void* data, tommy_key_t key) +{ + tommy_trie_node** let_ptr; + + node->data = data; + node->key = key; + + let_ptr = &trie->bucket[key >> TOMMY_TRIE_BUCKET_SHIFT]; + + trie_bucket_insert(trie, TOMMY_TRIE_BUCKET_SHIFT, let_ptr, node, key); + + ++trie->count; +} + +static tommy_trie_node* trie_bucket_remove_existing(tommy_trie* trie, tommy_uint_t shift, tommy_trie_node** let_ptr, tommy_trie_node* remove, tommy_key_t key) +{ + tommy_trie_node* node; + tommy_trie_tree* tree; + void* ptr; + tommy_trie_node** let_back[TOMMY_TRIE_LEVEL_MAX + 1]; + tommy_uint_t level; + tommy_uint_t i; + tommy_uint_t count; + tommy_uint_t last; + + level = 0; +recurse: + ptr = *let_ptr; + + if (!ptr) + return 0; + + if (trie_get_type(ptr) == TOMMY_TRIE_TYPE_TREE) { + tree = trie_get_tree(ptr); + + /* save the path */ + let_back[level++] = let_ptr; + + /* go down one level */ + let_ptr = &tree->map[(key >> shift) & TOMMY_TRIE_TREE_MASK]; + shift -= TOMMY_TRIE_TREE_BIT; + + goto recurse; + } + + node = tommy_cast(tommy_trie_node*, ptr); + + /* if the node to remove is not specified */ + if (!remove) { + /* remove the first */ + remove = node; + + /* check if it's really the element to remove */ + if (remove->key != key) + return 0; + } + + tommy_list_remove_existing(let_ptr, remove); + + /* if the list is not empty, try to reduce */ + if (*let_ptr || !level) + return remove; + +reduce: + /* go one level up */ + let_ptr = let_back[--level]; + + tree = trie_get_tree(*let_ptr); + + /* check if there is only one child node */ + count = 0; + last = 0; + for (i = 0; i < TOMMY_TRIE_TREE_MAX; ++i) { + if (tree->map[i]) { + /* if we have a sub tree, we cannot reduce */ + if (trie_get_type(tree->map[i]) != TOMMY_TRIE_TYPE_NODE) + return remove; + /* if more than one node, we cannot reduce */ + if (++count > 1) + return remove; + last = i; + } + } + + /* here count is never 0, as we cannot have a tree with only one sub node */ + assert(count == 1); + + *let_ptr = tree->map[last]; + + tommy_allocator_free(trie->alloc, tree); + --trie->node_count; + + /* repeat until more level */ + if (level) + goto reduce; + + return remove; +} + +void* tommy_trie_remove(tommy_trie* trie, tommy_key_t key) +{ + tommy_trie_node* ret; + tommy_trie_node** let_ptr; + + let_ptr = &trie->bucket[key >> TOMMY_TRIE_BUCKET_SHIFT]; + + ret = trie_bucket_remove_existing(trie, TOMMY_TRIE_BUCKET_SHIFT, let_ptr, 0, key); + + if (!ret) + return 0; + + --trie->count; + + return ret->data; +} + +void* tommy_trie_remove_existing(tommy_trie* trie, tommy_trie_node* node) +{ + tommy_trie_node* ret; + tommy_key_t key = node->key; + tommy_trie_node** let_ptr; + + let_ptr = &trie->bucket[key >> TOMMY_TRIE_BUCKET_SHIFT]; + + ret = trie_bucket_remove_existing(trie, TOMMY_TRIE_BUCKET_SHIFT, let_ptr, node, key); + + /* the element removed must match the one passed */ + assert(ret == node); + + --trie->count; + + return ret->data; +} + +tommy_trie_node* tommy_trie_bucket(tommy_trie* trie, tommy_key_t key) +{ + tommy_trie_node* node; + void* ptr; + tommy_uint_t type; + tommy_uint_t shift; + + ptr = trie->bucket[key >> TOMMY_TRIE_BUCKET_SHIFT]; + + shift = TOMMY_TRIE_BUCKET_SHIFT; + +recurse: + if (!ptr) + return 0; + + type = trie_get_type(ptr); + + switch (type) { + case TOMMY_TRIE_TYPE_NODE : + node = tommy_cast(tommy_trie_node*, ptr); + if (node->key != key) + return 0; + return node; + default : + case TOMMY_TRIE_TYPE_TREE : + ptr = trie_get_tree(ptr)->map[(key >> shift) & TOMMY_TRIE_TREE_MASK]; + shift -= TOMMY_TRIE_TREE_BIT; + goto recurse; + } +} + +tommy_size_t tommy_trie_memory_usage(tommy_trie* trie) +{ + return tommy_trie_count(trie) * (tommy_size_t)sizeof(tommy_trie_node) + + trie->node_count * (tommy_size_t)TOMMY_TRIE_BLOCK_SIZE; +} + diff --git a/third-party/tommyds/tommytrie.h b/third-party/tommyds/tommytrie.h new file mode 100644 index 0000000..d82a5f2 --- /dev/null +++ b/third-party/tommyds/tommytrie.h @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Trie optimized for cache utilization. + * + * This trie is a standard implementation that stores elements in the order defined + * by the key. + * + * It needs an external allocator for the inner nodes in the trie. + * + * You can control the number of branches of each node using the ::TOMMY_TRIE_TREE_MAX + * define. More branches imply more speed, but a bigger memory occupation. + * + * Compared to ::tommy_trie_inplace you have to provide a ::tommy_allocator allocator. + * Note that the C malloc() is too slow to futfill this role. + * + * To initialize the trie you have to call tommy_allocator_init() to initialize + * the allocator, and tommy_trie_init() for the trie. + * + * \code + * tommy_allocator alloc; + * tommy_trie trie; + * + * tommy_allocator_init(&alloc, TOMMY_TRIE_BLOCK_SIZE, TOMMY_TRIE_BLOCK_SIZE); + * + * tommy_trie_init(&trie, &alloc); + * \endcode + * + * To insert elements in the trie you have to call tommy_trie_insert() for + * each element. + * In the insertion call you have to specify the address of the node, the + * address of the object, and the key value to use. + * The address of the object is used to initialize the tommy_node::data field + * of the node, and the key to initialize the tommy_node::key field. + * + * \code + * struct object { + * int value; + * // other fields + * tommy_node node; + * }; + * + * struct object* obj = malloc(sizeof(struct object)); // creates the object + * + * obj->value = ...; // initializes the object + * + * tommy_trie_insert(&trie, &obj->node, obj, obj->value); // inserts the object + * \endcode + * + * To find and element in the trie you have to call tommy_trie_search() providing + * the key to search. + * + * \code + * int value_to_find = 1; + * struct object* obj = tommy_trie_search(&trie, value_to_find); + * if (!obj) { + * // not found + * } else { + * // found + * } + * \endcode + * + * To iterate over all the elements in the trie with the same key, you have to + * use tommy_trie_bucket() and follow the tommy_node::next pointer until NULL. + * + * \code + * int value_to_find = 1; + * tommy_node* i = tommy_trie_bucket(&trie, value_to_find); + * while (i) { + * struct object* obj = i->data; // gets the object pointer + * + * printf("%d\n", obj->value); // process the object + * + * i = i->next; // goes to the next element + * } + * \endcode + * + * To remove an element from the trie you have to call tommy_trie_remove() + * providing the key to search and remove. + * + * \code + * struct object* obj = tommy_trie_remove(&trie, value_to_remove); + * if (obj) { + * free(obj); // frees the object allocated memory + * } + * \endcode + * + * To destroy the trie you have to remove all the elements, and deinitialize + * the allocator using tommy_allocator_done(). + * + * \code + * tommy_allocator_done(&alloc); + * \endcode + * + * Note that you cannot iterate over all the elements in the trie using the + * trie itself. You have to insert all the elements also in a ::tommy_list, + * and use the list to iterate. See the \ref multiindex example for more detail. + */ + +#ifndef __TOMMYTRIE_H +#define __TOMMYTRIE_H + +#include "tommytypes.h" +#include "tommyalloc.h" + +/******************************************************************************/ +/* trie */ + +/** + * Number of branches on each inner node. It must be a power of 2. + * Suggested values are 8, 16 and 32. + * Any inner node, excluding leafs, contains a pointer to each branch. + * + * The default size is choosen to exactly fit a typical cache line of 64 bytes. + */ +#define TOMMY_TRIE_TREE_MAX (64 / sizeof(void*)) + +/** + * Trie block size. + * You must use this value to initialize the allocator. + */ +#define TOMMY_TRIE_BLOCK_SIZE (TOMMY_TRIE_TREE_MAX * sizeof(void*)) + +/** \internal + * Number of bits for each branch. + */ +#define TOMMY_TRIE_TREE_BIT TOMMY_ILOG2(TOMMY_TRIE_TREE_MAX) + +/** \internal + * Number of bits of the first level. + */ +#define TOMMY_TRIE_BUCKET_BIT ((TOMMY_KEY_BIT % TOMMY_TRIE_TREE_BIT) + TOMMY_TRIE_TREE_BIT) + +/** \internal + * Number of branches of the first level. + * It's like a inner branch, but bigger to get any remainder bits. + */ +#define TOMMY_TRIE_BUCKET_MAX (1 << TOMMY_TRIE_BUCKET_BIT) + +/** + * Trie node. + * This is the node that you have to include inside your objects. + */ +typedef tommy_node tommy_trie_node; + +/** + * Trie container type. + * \note Don't use internal fields directly, but access the container only using functions. + */ +typedef struct tommy_trie_struct { + tommy_trie_node* bucket[TOMMY_TRIE_BUCKET_MAX]; /**< First tree level. */ + tommy_count_t count; /**< Number of elements. */ + tommy_count_t node_count; /**< Number of nodes. */ + tommy_allocator* alloc; /**< Allocator for internal nodes. */ +} tommy_trie; + +/** + * Initializes the trie. + * You have to provide an allocator initialized with *both* the size and align with TOMMY_TRIE_BLOCK_SIZE. + * You can share this allocator with other tries. + * + * The tries is completely allocated through the allocator, and it doesn't need to be deinitialized. + * \param alloc Allocator initialized with *both* the size and align with TOMMY_TRIE_BLOCK_SIZE. + */ +void tommy_trie_init(tommy_trie* trie, tommy_allocator* alloc); + +/** + * Inserts an element in the trie. + * You have to provide the pointer of the node embedded into the object, + * the pointer to the object and the key to use. + * \param node Pointer to the node embedded into the object to insert. + * \param data Pointer to the object to insert. + * \param key Key to use to insert the object. + */ +void tommy_trie_insert(tommy_trie* trie, tommy_trie_node* node, void* data, tommy_key_t key); + +/** + * Searches and removes the first element with the specified key. + * If the element is not found, 0 is returned. + * If more equal elements are present, the first one is removed. + * This operation is faster than calling tommy_trie_bucket() and tommy_trie_remove_existing() separately. + * \param key Key of the element to find and remove. + * \return The removed element, or 0 if not found. + */ +void* tommy_trie_remove(tommy_trie* trie, tommy_key_t key); + +/** + * Gets the bucket of the specified key. + * The bucket is guaranteed to contain ALL and ONLY the elements with the specified key. + * You can access elements in the bucket following the ::next pointer until 0. + * \param key Key of the element to find. + * \return The head of the bucket, or 0 if empty. + */ +tommy_trie_node* tommy_trie_bucket(tommy_trie* trie, tommy_key_t key); + +/** + * Searches an element in the trie. + * You have to provide the key of the element you want to find. + * If more elements with the same key are present, the first one is returned. + * \param key Key of the element to find. + * \return The first element found, or 0 if none. + */ +tommy_inline void* tommy_trie_search(tommy_trie* trie, tommy_key_t key) +{ + tommy_trie_node* i = tommy_trie_bucket(trie, key); + + if (!i) + return 0; + + return i->data; +} + +/** + * Removes an element from the trie. + * You must already have the address of the element to remove. + * \return The tommy_node::data field of the node removed. + */ +void* tommy_trie_remove_existing(tommy_trie* trie, tommy_trie_node* node); + +/** + * Gets the number of elements. + */ +tommy_inline tommy_count_t tommy_trie_count(tommy_trie* trie) +{ + return trie->count; +} + +/** + * Gets the size of allocated memory. + * It includes the size of the ::tommy_trie_node of the stored elements. + */ +tommy_size_t tommy_trie_memory_usage(tommy_trie* trie); + +#endif + diff --git a/third-party/tommyds/tommytrieinp.c b/third-party/tommyds/tommytrieinp.c new file mode 100644 index 0000000..26ec2c3 --- /dev/null +++ b/third-party/tommyds/tommytrieinp.c @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tommytrieinp.h" + +#include <assert.h> /* for assert */ + +/******************************************************************************/ +/* trie_inplace */ + +/** + * Mask for the inner branches. + */ +#define TOMMY_TRIE_INPLACE_TREE_MASK (TOMMY_TRIE_INPLACE_TREE_MAX - 1) + +/** + * Shift for the first level of branches. + */ +#define TOMMY_TRIE_INPLACE_BUCKET_SHIFT (TOMMY_KEY_BIT - TOMMY_TRIE_INPLACE_BUCKET_BIT) + +/** + * Create a new list with a single element. + */ +tommy_inline tommy_trie_inplace_node* tommy_trie_inplace_list_insert_first(tommy_trie_inplace_node* node) +{ + /* one element "circular" prev list */ + node->prev = node; + + /* one element "0 terminated" next list */ + node->next = 0; + + return node; +} + +/** + * Add an element to an existing list. + * \note The element is inserted at the end of the list. + */ +tommy_inline void tommy_trie_inplace_list_insert_tail_not_empty(tommy_trie_inplace_node* head, tommy_trie_inplace_node* node) +{ + /* insert in the list in the last position */ + + /* insert in the "circular" prev list */ + node->prev = head->prev; + head->prev = node; + + /* insert in the "0 terminated" next list */ + node->next = 0; + node->prev->next = node; +} + +/** + * Remove an element from the list. + */ +tommy_inline void tommy_trie_inplace_list_remove(tommy_trie_inplace_node** let_ptr, tommy_trie_inplace_node* node) +{ + tommy_trie_inplace_node* head = *let_ptr; + + /* remove from the "circular" prev list */ + if (node->next) + node->next->prev = node->prev; + else + head->prev = node->prev; /* the last */ + + /* remove from the "0 terminated" next list */ + if (head == node) + *let_ptr = node->next; /* the new first */ + else + node->prev->next = node->next; +} + +void tommy_trie_inplace_init(tommy_trie_inplace* trie_inplace) +{ + tommy_uint_t i; + + for (i = 0; i < TOMMY_TRIE_INPLACE_BUCKET_MAX; ++i) + trie_inplace->bucket[i] = 0; + + trie_inplace->count = 0; +} + +static void trie_inplace_bucket_insert(tommy_uint_t shift, tommy_trie_inplace_node** let_ptr, tommy_trie_inplace_node* insert, tommy_key_t key) +{ + tommy_trie_inplace_node* node; + + node = *let_ptr; + while (node && node->key != key) { + let_ptr = &node->map[(key >> shift) & TOMMY_TRIE_INPLACE_TREE_MASK]; + node = *let_ptr; + shift -= TOMMY_TRIE_INPLACE_TREE_BIT; + } + + /* if null, just insert the node */ + if (!node) { + /* setup the node as a list */ + *let_ptr = tommy_trie_inplace_list_insert_first(insert); + } else { + /* if it's the same key, insert in the list */ + tommy_trie_inplace_list_insert_tail_not_empty(node, insert); + } +} + +void tommy_trie_inplace_insert(tommy_trie_inplace* trie_inplace, tommy_trie_inplace_node* node, void* data, tommy_key_t key) +{ + tommy_trie_inplace_node** let_ptr; + tommy_uint_t i; + + node->data = data; + node->key = key; + /* clear the child pointers */ + for (i = 0; i < TOMMY_TRIE_INPLACE_TREE_MAX; ++i) + node->map[i] = 0; + + let_ptr = &trie_inplace->bucket[key >> TOMMY_TRIE_INPLACE_BUCKET_SHIFT]; + + trie_inplace_bucket_insert(TOMMY_TRIE_INPLACE_BUCKET_SHIFT, let_ptr, node, key); + + ++trie_inplace->count; +} + +static tommy_trie_inplace_node* trie_inplace_bucket_remove(tommy_uint_t shift, tommy_trie_inplace_node** let_ptr, tommy_trie_inplace_node* remove, tommy_key_t key) +{ + tommy_trie_inplace_node* node; + int i; + tommy_trie_inplace_node** leaf_let_ptr; + tommy_trie_inplace_node* leaf; + + node = *let_ptr; + while (node && node->key != key) { + let_ptr = &node->map[(key >> shift) & TOMMY_TRIE_INPLACE_TREE_MASK]; + node = *let_ptr; + shift -= TOMMY_TRIE_INPLACE_TREE_BIT; + } + + if (!node) + return 0; + + /* if the node to remove is not specified */ + if (!remove) + remove = node; /* remove the first */ + + tommy_trie_inplace_list_remove(let_ptr, remove); + + /* if not change in the node, nothing more to do */ + if (*let_ptr == node) + return remove; + + /* if we have a substitute */ + if (*let_ptr != 0) { + /* copy the child pointers to the new one */ + node = *let_ptr; + for (i = 0; i < TOMMY_TRIE_INPLACE_TREE_MAX; ++i) + node->map[i] = remove->map[i]; + + return remove; + } + + /* find a leaf */ + leaf_let_ptr = 0; + leaf = remove; + + /* search backward, statistically we have more zeros than ones */ + i = TOMMY_TRIE_INPLACE_TREE_MAX - 1; + while (i >= 0) { + if (leaf->map[i]) { + leaf_let_ptr = &leaf->map[i]; + leaf = *leaf_let_ptr; + i = TOMMY_TRIE_INPLACE_TREE_MAX - 1; + continue; + } + --i; + } + + /* if it's itself a leaf */ + if (!leaf_let_ptr) + return remove; + + /* remove the leaf */ + *leaf_let_ptr = 0; + + /* copy the child pointers */ + for (i = 0; i < TOMMY_TRIE_INPLACE_TREE_MAX; ++i) + leaf->map[i] = remove->map[i]; + + /* put it in place */ + *let_ptr = leaf; + + return remove; +} + +void* tommy_trie_inplace_remove(tommy_trie_inplace* trie_inplace, tommy_key_t key) +{ + tommy_trie_inplace_node* ret; + tommy_trie_inplace_node** let_ptr; + + let_ptr = &trie_inplace->bucket[key >> TOMMY_TRIE_INPLACE_BUCKET_SHIFT]; + + ret = trie_inplace_bucket_remove(TOMMY_TRIE_INPLACE_BUCKET_SHIFT, let_ptr, 0, key); + + if (!ret) + return 0; + + --trie_inplace->count; + + return ret->data; +} + +void* tommy_trie_inplace_remove_existing(tommy_trie_inplace* trie_inplace, tommy_trie_inplace_node* node) +{ + tommy_trie_inplace_node* ret; + tommy_key_t key = node->key; + tommy_trie_inplace_node** let_ptr; + + let_ptr = &trie_inplace->bucket[key >> TOMMY_TRIE_INPLACE_BUCKET_SHIFT]; + + ret = trie_inplace_bucket_remove(TOMMY_TRIE_INPLACE_BUCKET_SHIFT, let_ptr, node, key); + + /* the element removed must match the one passed */ + assert(ret == node); + + --trie_inplace->count; + + return ret->data; +} + +tommy_trie_inplace_node* tommy_trie_inplace_bucket(tommy_trie_inplace* trie_inplace, tommy_key_t key) +{ + tommy_trie_inplace_node* node; + tommy_uint_t shift; + + node = trie_inplace->bucket[key >> TOMMY_TRIE_INPLACE_BUCKET_SHIFT]; + shift = TOMMY_TRIE_INPLACE_BUCKET_SHIFT; + + while (node && node->key != key) { + node = node->map[(key >> shift) & TOMMY_TRIE_INPLACE_TREE_MASK]; + shift -= TOMMY_TRIE_INPLACE_TREE_BIT; + } + + return node; +} + +tommy_size_t tommy_trie_inplace_memory_usage(tommy_trie_inplace* trie_inplace) +{ + return tommy_trie_inplace_count(trie_inplace) * (tommy_size_t)sizeof(tommy_trie_inplace_node); +} + diff --git a/third-party/tommyds/tommytrieinp.h b/third-party/tommyds/tommytrieinp.h new file mode 100644 index 0000000..9448a50 --- /dev/null +++ b/third-party/tommyds/tommytrieinp.h @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Inplace trie. + * + * This trie is a inplace implementation not needing any external allocation. + * + * Elements are not stored in order, like ::tommy_trie, because some elements + * should be used to represent the inner nodes in the trie. + * + * You can control the number of branches of each node using the ::TOMMY_TRIE_INPLACE_TREE_MAX define. + * More branches imply more speed, but a bigger memory occupation. + * + * Compared to ::tommy_trie you should use a lower number of branches to limit the unused memory + * occupation of the leaf nodes. This imply a lower speed, but without the need of an external allocator. + * + * To initialize the trie you have to call tommy_trie_inplace_init(). + * + * \code + * tommy_trie_inplace trie_inplace; + * + * tommy_trie_inplace_init(&trie_inplace); + * \endcode + * + * To insert elements in the trie you have to call tommy_trie_inplace_insert() for + * each element. + * In the insertion call you have to specify the address of the node, the + * address of the object, and the key value to use. + * The address of the object is used to initialize the tommy_node::data field + * of the node, and the key to initialize the tommy_node::key field. + * + * \code + * struct object { + * int value; + * // other fields + * tommy_node node; + * }; + * + * struct object* obj = malloc(sizeof(struct object)); // creates the object + * + * obj->value = ...; // initializes the object + * + * tommy_trie_inplace_insert(&trie_inplace, &obj->node, obj, obj->value); // inserts the object + * \endcode + * + * To find and element in the trie you have to call tommy_trie_inplace_search() providing + * the key to search. + * + * \code + * int value_to_find = 1; + * struct object* obj = tommy_trie_inplace_search(&trie_inplace, value_to_find); + * if (!obj) { + * // not found + * } else { + * // found + * } + * \endcode + * + * To iterate over all the elements in the trie with the same key, you have to + * use tommy_trie_inplace_bucket() and follow the tommy_node::next pointer until NULL. + * + * \code + * int value_to_find = 1; + * tommy_node* i = tommy_trie_inplace_bucket(&trie_inplace, value_to_find); + * while (i) { + * struct object* obj = i->data; // gets the object pointer + * + * printf("%d\n", obj->value); // process the object + * + * i = i->next; // goes to the next element + * } + * \endcode + * + * To remove an element from the trie you have to call tommy_trie_inplace_remove() + * providing the key to search and remove. + * + * \code + * struct object* obj = tommy_trie_inplace_remove(&trie_inplace, value_to_remove); + * if (obj) { + * free(obj); // frees the object allocated memory + * } + * \endcode + * + * To destroy the trie you have only to remove all the elements, as the trie is + * completely inplace and it doesn't allocate memory. + * + * Note that you cannot iterate over all the elements in the trie using the + * trie itself. You have to insert all the elements also in a ::tommy_list, + * and use the list to iterate. See the \ref multiindex example for more detail. + */ + +#ifndef __TOMMYTRIEINP_H +#define __TOMMYTRIEINP_H + +#include "tommytypes.h" + +/******************************************************************************/ +/* trie_inplace */ + +/** + * Number of branches on each node. It must be a power of 2. + * Suggested values are 2, 4 and 8. + * Any node, including leafs, contains a pointer to each branch. + */ +#define TOMMY_TRIE_INPLACE_TREE_MAX 4 + +/** \internal + * Number of bits for each branch. + */ +#define TOMMY_TRIE_INPLACE_TREE_BIT TOMMY_ILOG2(TOMMY_TRIE_INPLACE_TREE_MAX) + +/** \internal + * Number of bits of the first level. + */ +#define TOMMY_TRIE_INPLACE_BUCKET_BIT ((TOMMY_KEY_BIT % TOMMY_TRIE_INPLACE_TREE_BIT) + 3 * TOMMY_TRIE_INPLACE_TREE_BIT) + +/** \internal + * Number of branches of the first level. + * It's like a inner branch, but bigger to get any remainder bits. + */ +#define TOMMY_TRIE_INPLACE_BUCKET_MAX (1 << TOMMY_TRIE_INPLACE_BUCKET_BIT) + +/** + * Trie node. + * This is the node that you have to include inside your objects. + */ +typedef struct tommy_trie_inplace_node_struct { + struct tommy_trie_inplace_node_struct* next; /**< Next element. 0 if it's the last. */ + struct tommy_trie_inplace_node_struct* prev; /**< Circular previous element. */ + void* data; /**< Pointer to the data. */ + tommy_key_t key; /**< Used to store the key or the hash. */ + struct tommy_trie_inplace_node_struct* map[TOMMY_TRIE_INPLACE_TREE_MAX]; /** Branches of the node. */ +} tommy_trie_inplace_node; + +/** + * Trie container type. + * \note Don't use internal fields directly, but access the container only using functions. + */ +typedef struct tommy_trie_inplace_struct { + tommy_trie_inplace_node* bucket[TOMMY_TRIE_INPLACE_BUCKET_MAX]; /**< First tree level. */ + tommy_count_t count; /**< Number of elements. */ +} tommy_trie_inplace; + +/** + * Initializes the trie. + * + * The tries is completely inplace, and it doesn't need to be deinitialized. + */ +void tommy_trie_inplace_init(tommy_trie_inplace* trie_inplace); + +/** + * Inserts an element in the trie. + */ +void tommy_trie_inplace_insert(tommy_trie_inplace* trie_inplace, tommy_trie_inplace_node* node, void* data, tommy_key_t key); + +/** + * Searches and removes the first element with the specified key. + * If the element is not found, 0 is returned. + * If more equal elements are present, the first one is removed. + * This operation is faster than calling tommy_trie_inplace_bucket() and tommy_trie_inplace_remove_existing() separately. + * \param key Key of the element to find and remove. + * \return The removed element, or 0 if not found. + */ +void* tommy_trie_inplace_remove(tommy_trie_inplace* trie_inplace, tommy_key_t key); + +/** + * Gets the bucket of the specified key. + * The bucket is guaranteed to contain ALL and ONLY the elements with the specified key. + * You can access elements in the bucket following the ::next pointer until 0. + * \param key Key of the element to find. + * \return The head of the bucket, or 0 if empty. + */ +tommy_trie_inplace_node* tommy_trie_inplace_bucket(tommy_trie_inplace* trie_inplace, tommy_key_t key); + +/** + * Searches an element in the trie. + * You have to provide the key of the element you want to find. + * If more elements with the same key are present, the first one is returned. + * \param key Key of the element to find. + * \return The first element found, or 0 if none. + */ +tommy_inline void* tommy_trie_inplace_search(tommy_trie_inplace* trie_inplace, tommy_key_t key) +{ + tommy_trie_inplace_node* i = tommy_trie_inplace_bucket(trie_inplace, key); + + if (!i) + return 0; + + return i->data; +} + +/** + * Removes an element from the trie. + * You must already have the address of the element to remove. + * \return The tommy_node::data field of the node removed. + */ +void* tommy_trie_inplace_remove_existing(tommy_trie_inplace* trie_inplace, tommy_trie_inplace_node* node); + +/** + * Gets the number of elements. + */ +tommy_inline tommy_count_t tommy_trie_inplace_count(tommy_trie_inplace* trie_inplace) +{ + return trie_inplace->count; +} + +/** + * Gets the size of allocated memory. + * It includes the size of the ::tommy_inplace_node of the stored elements. + */ +tommy_size_t tommy_trie_inplace_memory_usage(tommy_trie_inplace* trie_inplace); + +#endif + diff --git a/third-party/tommyds/tommytypes.h b/third-party/tommyds/tommytypes.h new file mode 100644 index 0000000..a52d910 --- /dev/null +++ b/third-party/tommyds/tommytypes.h @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Generic types. + */ + +#ifndef __TOMMYTYPES_H +#define __TOMMYTYPES_H + +/******************************************************************************/ +/* types */ + +#include <stddef.h> + +#if defined(_MSC_VER) +typedef unsigned tommy_uint32_t; /**< Generic uint32_t type. */ +typedef unsigned _int64 tommy_uint64_t; /**< Generic uint64_t type. */ +typedef size_t tommy_uintptr_t; /**< Generic uintptr_t type. */ +#else +#include <stdint.h> +typedef uint32_t tommy_uint32_t; /**< Generic uint32_t type. */ +typedef uint64_t tommy_uint64_t; /**< Generic uint64_t type. */ +typedef uintptr_t tommy_uintptr_t; /**< Generic uintptr_t type. */ +#endif +typedef size_t tommy_size_t; /**< Generic size_t type. */ +typedef ptrdiff_t tommy_ptrdiff_t; /**< Generic ptrdiff_t type. */ +typedef int tommy_bool_t; /**< Generic boolean type. */ + +/** + * Generic unsigned integer type. + * + * It has no specific size, as is used to store only small values. + * To make the code more efficient, a full 32 bit integer is used. + */ +typedef tommy_uint32_t tommy_uint_t; + +/** + * Generic unsigned integer for counting objects. + * + * TommyDS doesn't support more than 2^32-1 objects. + */ +typedef tommy_uint32_t tommy_count_t; + +/** \internal + * Type cast required for the C++ compilation. + * When compiling in C++ we cannot convert a void* pointer to another pointer. + * In such case we need an explicit cast. + */ +#ifdef __cplusplus +#define tommy_cast(type, value) static_cast<type>(value) +#else +#define tommy_cast(type, value) (value) +#endif + +/******************************************************************************/ +/* heap */ + +/* by default uses malloc/calloc/realloc/free */ + +/** + * Generic malloc(), calloc(), realloc() and free() functions. + * Redefine them to what you need. By default they map to the C malloc(), calloc(), realloc() and free(). + */ +#if !defined(tommy_malloc) || !defined(tommy_calloc) || !defined(tommy_realloc) || !defined(tommy_free) +#include "rtrlib/lib/alloc_utils_private.h" +#endif +#if !defined(tommy_malloc) +#define tommy_malloc lrtr_malloc +#endif +#if !defined(tommy_calloc) +#define tommy_calloc lrtr_calloc +#endif +#if !defined(tommy_realloc) +#define tommy_realloc lrtr_realloc +#endif +#if !defined(tommy_free) +#define tommy_free lrtr_free +#endif + +/******************************************************************************/ +/* modificators */ + +/** \internal + * Definition of the inline keyword if available. + */ +#if !defined(tommy_inline) +#if defined(_MSC_VER) || defined(__GNUC__) +#define tommy_inline static __inline +#else +#define tommy_inline static +#endif +#endif + +/** \internal + * Definition of the restrict keyword if available. + */ +#if !defined(tommy_restrict) +#if __STDC_VERSION__ >= 199901L +#define tommy_restrict restrict +#elif defined(_MSC_VER) || defined(__GNUC__) +#define tommy_restrict __restrict +#else +#define tommy_restrict +#endif +#endif + +/** \internal + * Hints the compiler that a condition is likely true. + */ +#if !defined(tommy_likely) +#if defined(__GNUC__) +#define tommy_likely(x) __builtin_expect(!!(x), 1) +#else +#define tommy_likely(x) (x) +#endif +#endif + +/** \internal + * Hints the compiler that a condition is likely false. + */ +#if !defined(tommy_unlikely) +#if defined(__GNUC__) +#define tommy_unlikely(x) __builtin_expect(!!(x), 0) +#else +#define tommy_unlikely(x) (x) +#endif +#endif + +/******************************************************************************/ +/* key */ + +/** + * Key type used in indexed data structures to store the key or the hash value. + */ +typedef tommy_uint32_t tommy_key_t; + +/** + * Bits into the ::tommy_key_t type. + */ +#define TOMMY_KEY_BIT (sizeof(tommy_key_t) * 8) + +/******************************************************************************/ +/* node */ + +/** + * Data structure node. + * This node type is shared between all the data structures and used to store some + * info directly into the objects you want to store. + * + * A typical declaration is: + * \code + * struct object { + * tommy_node node; + * // other fields + * }; + * \endcode + */ +typedef struct tommy_node_struct { + /** + * Next node. + * The tail node has it at 0, like a 0 terminated list. + */ + struct tommy_node_struct* next; + + /** + * Previous node. + * The head node points to the tail node, like a circular list. + */ + struct tommy_node_struct* prev; + + /** + * Pointer to the object containing the node. + * This field is initialized when inserting nodes into a data structure. + */ + void* data; + + /** + * Key used to store the node. + * With hashtables this field is used to store the hash value. + * With lists this field is not used. + */ + tommy_key_t key; +} tommy_node; + +/******************************************************************************/ +/* compare */ + +/** + * Compare function for elements. + * \param obj_a Pointer to the first object to compare. + * \param obj_b Pointer to the second object to compare. + * \return <0 if the first element is less than the second, ==0 equal, >0 if greather. + * + * This function is like the C strcmp(). + * + * \code + * struct object { + * tommy_node node; + * int value; + * }; + * + * int compare(const void* obj_a, const void* obj_b) + * { + * if (((const struct object*)obj_a)->value < ((const struct object*)obj_b)->value) + * return -1; + * if (((const struct object*)obj_a)->value > ((const struct object*)obj_b)->value) + * return 1; + * return 0; + * } + * + * tommy_list_sort(&list, compare); + * \endcode + * + */ +typedef int tommy_compare_func(const void* obj_a, const void* obj_b); + +/** + * Search function for elements. + * \param arg Pointer to the value to search as passed at the search function. + * \param obj Pointer to the object to compare to. + * \return ==0 if the value matches the element. !=0 if different. + * + * The first argument is a pointer to the value to search exactly + * as it's passed at the search function called. + * The second argument is a pointer to the object inside the hashtable to compare. + * + * The return value has to be 0 if the values are equal. != 0 if they are different. + * + * \code + * struct object { + * tommy_node node; + * int value; + * }; + * + * int compare(const void* arg, const void* obj) + * { + * const int* value_to_find = arg; + * const struct object* object_to_compare = obj; + * + * return *value_to_find != object_to_compare->value; + * } + * + * int value_to_find = 1; + * struct object* obj = tommy_hashtable_search(&hashtable, compare, &value_to_find, tommy_inthash_u32(value_to_find)); + * if (!obj) { + * // not found + * } else { + * // found + * } + * \endcode + * + */ +typedef int tommy_search_func(const void* arg, const void* obj); + +/** + * Foreach function. + * \param obj Pointer to the object to iterate. + * + * A typical example is to use free() to deallocate all the objects in a list. + * \code + * tommy_list_foreach(&list, (tommy_foreach_func*)free); + * \endcode + */ +typedef void tommy_foreach_func(void* obj); + +/** + * Foreach function with an argument. + * \param arg Pointer to a generic argument. + * \param obj Pointer to the object to iterate. + */ +typedef void tommy_foreach_arg_func(void* arg, void* obj); + +/******************************************************************************/ +/* bit hacks */ + +#if defined(_MSC_VER) && !defined(__cplusplus) +#include <intrin.h> +#pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) +#endif + +/** \internal + * Integer log2 for constants. + * You can use it only for exact power of 2 up to 256. + */ +#define TOMMY_ILOG2(value) ((value) == 256 ? 8 : (value) == 128 ? 7 : (value) == 64 ? 6 : (value) == 32 ? 5 : (value) == 16 ? 4 : (value) == 8 ? 3 : (value) == 4 ? 2 : (value) == 2 ? 1 : 0) + +/** + * Bit scan reverse or integer log2. + * Return the bit index of the most significant 1 bit. + * + * If no bit is set, the result is undefined. + * To force a return 0 in this case, you can use tommy_ilog2_u32(value | 1). + * + * Other interesting ways for bitscan are at: + * + * Bit Twiddling Hacks + * http://graphics.stanford.edu/~seander/bithacks.html + * + * Chess Programming BitScan + * http://chessprogramming.wikispaces.com/BitScan + * + * \param value Value to scan. 0 is not allowed. + * \return The index of the most significant bit set. + */ +tommy_inline tommy_uint_t tommy_ilog2_u32(tommy_uint32_t value) +{ +#if defined(_MSC_VER) + unsigned long count; + _BitScanReverse(&count, value); + return count; +#elif defined(__GNUC__) + /* + * GCC implements __builtin_clz(x) as "__builtin_clz(x) = bsr(x) ^ 31" + * + * Where "x ^ 31 = 31 - x", but gcc does not optimize "31 - __builtin_clz(x)" to bsr(x), + * but generates 31 - (bsr(x) xor 31). + * + * So we write "__builtin_clz(x) ^ 31" instead of "31 - __builtin_clz(x)", + * to allow the double xor to be optimized out. + */ + return __builtin_clz(value) ^ 31; +#else + /* Find the log base 2 of an N-bit integer in O(lg(N)) operations with multiply and lookup */ + /* from http://graphics.stanford.edu/~seander/bithacks.html */ + static unsigned char TOMMY_DE_BRUIJN_INDEX_ILOG2[32] = { + 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 + }; + + value |= value >> 1; + value |= value >> 2; + value |= value >> 4; + value |= value >> 8; + value |= value >> 16; + + return TOMMY_DE_BRUIJN_INDEX_ILOG2[(tommy_uint32_t)(value * 0x07C4ACDDU) >> 27]; +#endif +} + +/** + * Bit scan forward or trailing zero count. + * Return the bit index of the least significant 1 bit. + * + * If no bit is set, the result is undefined. + * \param value Value to scan. 0 is not allowed. + * \return The index of the least significant bit set. + */ +tommy_inline tommy_uint_t tommy_ctz_u32(tommy_uint32_t value) +{ +#if defined(_MSC_VER) + unsigned long count; + _BitScanForward(&count, value); + return count; +#elif defined(__GNUC__) + return __builtin_ctz(value); +#else + /* Count the consecutive zero bits (trailing) on the right with multiply and lookup */ + /* from http://graphics.stanford.edu/~seander/bithacks.html */ + static const unsigned char TOMMY_DE_BRUIJN_INDEX_CTZ[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + + return TOMMY_DE_BRUIJN_INDEX_CTZ[(tommy_uint32_t)(((value & - value) * 0x077CB531U)) >> 27]; +#endif +} + +/** + * Rounds up to the next power of 2. + * For the value 0, the result is undefined. + * \return The smallest power of 2 not less than the specified value. + */ +tommy_inline tommy_uint32_t tommy_roundup_pow2_u32(tommy_uint32_t value) +{ + /* Round up to the next highest power of 2 */ + /* from http://graphics.stanford.edu/~seander/bithacks.html */ + + --value; + value |= value >> 1; + value |= value >> 2; + value |= value >> 4; + value |= value >> 8; + value |= value >> 16; + ++value; + + return value; +} + +/** + * Check if the specified word has a byte at 0. + * \return 0 or 1. + */ +tommy_inline int tommy_haszero_u32(tommy_uint32_t value) +{ + return ((value - 0x01010101) & ~value & 0x80808080) != 0; +} +#endif + |