summaryrefslogtreecommitdiffstats
path: root/src/kmk/strcache2.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/kmk/strcache2.c')
-rw-r--r--src/kmk/strcache2.c1327
1 files changed, 1327 insertions, 0 deletions
diff --git a/src/kmk/strcache2.c b/src/kmk/strcache2.c
new file mode 100644
index 0000000..c2bad20
--- /dev/null
+++ b/src/kmk/strcache2.c
@@ -0,0 +1,1327 @@
+/* $Id: strcache2.c 3140 2018-03-14 21:28:10Z bird $ */
+/** @file
+ * strcache2 - New string cache.
+ */
+
+/*
+ * Copyright (c) 2008-2010 knut st. osmundsen <bird-kBuild-spamx@anduin.net>
+ *
+ * This file is part of kBuild.
+ *
+ * kBuild is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * kBuild is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with kBuild. If not, see <http://www.gnu.org/licenses/>
+ *
+ */
+
+/*******************************************************************************
+* Header Files *
+*******************************************************************************/
+#include "makeint.h"
+#include "strcache2.h"
+
+#include <assert.h>
+
+#include "debug.h"
+
+#ifdef _MSC_VER
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef signed char int8_t;
+typedef signed short int16_t;
+typedef signed int int32_t;
+#else
+# include <stdint.h>
+#endif
+
+#ifdef WINDOWS32
+# include <io.h>
+# include <process.h>
+# include <Windows.h>
+# define PARSE_IN_WORKER
+#endif
+
+#ifdef __OS2__
+# include <sys/fmutex.h>
+#endif
+
+#ifdef HAVE_PTHREAD
+# include <pthread.h>
+#endif
+
+
+/*******************************************************************************
+* Defined Constants And Macros *
+*******************************************************************************/
+/* The default size of a memory segment (1MB). */
+#define STRCACHE2_SEG_SIZE (1024U*1024U)
+/* The default hash table shift (hash size give as a power of two). */
+#define STRCACHE2_HASH_SHIFT 16
+/** Does the modding / masking of a hash number into an index. */
+#ifdef STRCACHE2_USE_MASK
+# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
+#else
+# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
+#endif
+
+# if ( defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
+ || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)) \
+ && !defined(GCC_ADDRESS_SANITIZER)
+# define strcache2_get_unaligned_16bits(ptr) ( *((const uint16_t *)(ptr)))
+# else
+ /* (endian doesn't matter) */
+# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
+ | (((const uint8_t *)(ptr))[1]) )
+# endif
+
+
+/*******************************************************************************
+* Global Variables *
+*******************************************************************************/
+/* List of initialized string caches. */
+static struct strcache2 *strcache_head;
+
+
+#ifndef STRCACHE2_USE_MASK
+/** Finds the closest primary number for power of two value (or something else
+ * useful if not support). */
+MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
+{
+ switch (shift)
+ {
+ case 5: return 31;
+ case 6: return 61;
+ case 7: return 127;
+ case 8: return 251;
+ case 9: return 509;
+ case 10: return 1021;
+ case 11: return 2039;
+ case 12: return 4093;
+ case 13: return 8191;
+ case 14: return 16381;
+ case 15: return 32749;
+ case 16: return 65521;
+ case 17: return 131063;
+ case 18: return 262139;
+ case 19: return 524269;
+ case 20: return 1048573;
+ case 21: return 2097143;
+ case 22: return 4194301;
+ case 23: return 8388593;
+
+ default:
+ assert (0);
+ return (1 << shift) - 1;
+ }
+}
+#endif
+
+/* The following is a bit experiment. It produces longer chains, i.e. worse
+ distribution of the strings in the table, however the actual make
+ performances is better (<time). The explanation is probably that the
+ collisions only really increase for entries that aren't looked up that
+ much and that it actually improoves the situation for those that is. Or
+ that we spend so much less time hashing that it makes up (and more) for
+ the pentalty we suffer from the longer chains and worse distribution.
+
+ XXX: Check how this works out with different path lengths. I suspect it
+ might depend on the length of PATH_ROOT and the depth of the files
+ in the project as well. If it does, this might make matters worse
+ for some and better for others which isn't very cool... */
+
+#if 0
+# define BIG_HASH_SIZE 32 /* kinda fast */
+# define BIG_HASH_HEAD 16
+# define BIG_HASH_TAIL 12
+#elif 0
+# define BIG_HASH_SIZE 68 /* kinda safe */
+# define BIG_HASH_HEAD 24
+# define BIG_HASH_TAIL 24
+#elif 0
+# define BIG_HASH_SIZE 128 /* safe */
+# define BIG_HASH_HEAD 32
+# define BIG_HASH_TAIL 32
+#endif
+
+#ifdef BIG_HASH_SIZE
+/* long string: hash head and tail, drop the middle. */
+MY_INLINE unsigned int
+strcache2_case_sensitive_hash_big (const char *str, unsigned int len)
+{
+ uint32_t hash = len;
+ uint32_t tmp;
+ unsigned int head;
+
+ /* head BIG_HASH_HEAD bytes */
+ head = (BIG_HASH_HEAD >> 2);
+ while (head-- > 0)
+ {
+ hash += strcache2_get_unaligned_16bits (str);
+ tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ str += 2 * sizeof (uint16_t);
+ hash += hash >> 11;
+ }
+
+ /* tail BIG_HASH_TAIL bytes (minus the odd ones) */
+ str += (len - BIG_HASH_HEAD - BIG_HASH_TAIL) & ~3U;
+ head = (BIG_HASH_TAIL >> 2);
+ while (head-- > 0)
+ {
+ hash += strcache2_get_unaligned_16bits (str);
+ tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ str += 2 * sizeof (uint16_t);
+ hash += hash >> 11;
+ }
+
+ /* force "avalanching" of final 127 bits. */
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+}
+#endif /* BIG_HASH_SIZE */
+
+MY_INLINE unsigned int
+strcache2_case_sensitive_hash (const char *str, unsigned int len)
+{
+#if 1
+ /* Paul Hsieh hash SuperFast function:
+ http://www.azillionmonkeys.com/qed/hash.html
+
+ This performs very good and as a sligtly better distribution than
+ STRING_N_HASH_1 on a typical kBuild run.
+
+ It is also 37% faster than return_STRING_N_HASH_1 when running the
+ two 100 times over typical kBuild strings that end up here (did a
+ fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
+ with -O2.
+
+ FIXME: A path for well aligned data should be added to speed up
+ execution on alignment sensitive systems. */
+ unsigned int rem;
+ uint32_t hash;
+ uint32_t tmp;
+
+ assert (sizeof (uint8_t) == sizeof (char));
+
+# ifdef BIG_HASH_SIZE
+ /* long string? */
+# if 0 /*BIG_HASH_SIZE > 128*/
+ if (MY_PREDICT_FALSE(len >= BIG_HASH_SIZE))
+# else
+ if (len >= BIG_HASH_SIZE)
+# endif
+ return strcache2_case_sensitive_hash_big (str, len);
+# endif
+
+ /* short string: main loop, walking on 2 x uint16_t */
+ hash = len;
+ rem = len & 3;
+ len >>= 2;
+ while (len > 0)
+ {
+ hash += strcache2_get_unaligned_16bits (str);
+ tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ str += 2 * sizeof (uint16_t);
+ hash += hash >> 11;
+ len--;
+ }
+
+ /* the remainder */
+ switch (rem)
+ {
+ case 3:
+ hash += strcache2_get_unaligned_16bits (str);
+ hash ^= hash << 16;
+ hash ^= str[sizeof (uint16_t)] << 18;
+ hash += hash >> 11;
+ break;
+ case 2:
+ hash += strcache2_get_unaligned_16bits (str);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1:
+ hash += *str;
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ break;
+ }
+
+ /* force "avalanching" of final 127 bits. */
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+
+#elif 1
+ /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
+ when running the two 100 times over typical kBuild strings that
+ end up here (did a fprintf here and built kBuild).
+ Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
+
+ unsigned int hash = 0;
+ if (MY_PREDICT_TRUE(len >= 2))
+ {
+ unsigned int ch0 = *str++;
+ hash = 0;
+ len--;
+ while (len >= 2)
+ {
+ unsigned int ch1 = *str++;
+ hash += ch0 << (ch1 & 0xf);
+
+ ch0 = *str++;
+ hash += ch1 << (ch0 & 0xf);
+
+ len -= 2;
+ }
+ if (len == 1)
+ {
+ unsigned ch1 = *str;
+ hash += ch0 << (ch1 & 0xf);
+
+ hash += ch1;
+ }
+ else
+ hash += ch0;
+ }
+ else if (len)
+ {
+ hash = *str;
+ hash += hash << (hash & 0xf);
+ }
+ else
+ hash = 0;
+ return hash;
+
+#elif 1
+# if 0
+ /* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
+ than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
+ first in the assignment made it noticably slower.)
+
+ However, it is noticably slower in practice, most likely because of more
+ collisions. Hrmpf. */
+
+# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
+ unsigned int hash = 0;
+
+# else
+ /* This is DJB2. This specific form/unroll was benchmarked to be 27%
+ fast than return_STRING_N_HASH_1.
+
+ Ditto. */
+
+# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
+ unsigned int hash = 5381;
+# endif
+
+
+ while (len >= 4)
+ {
+ UPDATE_HASH (str[0]);
+ UPDATE_HASH (str[1]);
+ UPDATE_HASH (str[2]);
+ UPDATE_HASH (str[3]);
+ str += 4;
+ len -= 4;
+ }
+ switch (len)
+ {
+ default:
+ case 0:
+ return hash;
+ case 1:
+ UPDATE_HASH (str[0]);
+ return hash;
+ case 2:
+ UPDATE_HASH (str[0]);
+ UPDATE_HASH (str[1]);
+ return hash;
+ case 3:
+ UPDATE_HASH (str[0]);
+ UPDATE_HASH (str[1]);
+ UPDATE_HASH (str[2]);
+ return hash;
+ }
+#endif
+}
+
+MY_INLINE unsigned int
+strcache2_case_insensitive_hash (const char *str, unsigned int len)
+{
+ unsigned int hash = 0;
+ if (MY_PREDICT_TRUE(len >= 2))
+ {
+ unsigned int ch0 = *str++;
+ ch0 = tolower (ch0);
+ hash = 0;
+ len--;
+ while (len >= 2)
+ {
+ unsigned int ch1 = *str++;
+ ch1 = tolower (ch1);
+ hash += ch0 << (ch1 & 0xf);
+
+ ch0 = *str++;
+ ch0 = tolower (ch0);
+ hash += ch1 << (ch0 & 0xf);
+
+ len -= 2;
+ }
+ if (len == 1)
+ {
+ unsigned ch1 = *str;
+ ch1 = tolower (ch1);
+ hash += ch0 << (ch1 & 0xf);
+
+ hash += ch1;
+ }
+ else
+ hash += ch0;
+ }
+ else if (len)
+ {
+ hash = *str;
+ hash += hash << (hash & 0xf);
+ }
+ else
+ hash = 0;
+ return hash;
+}
+
+#if 0
+MY_INLINE int
+strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
+{
+ if (length <= 8)
+ {
+ /* short string compare - ~50% of the kBuild calls. */
+ assert ( !((size_t)ys & 3) );
+ if (!((size_t)xs & 3))
+ {
+ /* aligned */
+ int result;
+ switch (length)
+ {
+ default: /* memcmp for longer strings */
+ return memcmp (xs, ys, length);
+ case 8:
+ result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
+ result |= *(int32_t*)xs - *(int32_t*)ys;
+ return result;
+ case 7:
+ result = xs[6] - ys[6];
+ result |= xs[5] - ys[5];
+ result |= xs[4] - ys[4];
+ result |= *(int32_t*)xs - *(int32_t*)ys;
+ return result;
+ case 6:
+ result = xs[5] - ys[5];
+ result |= xs[4] - ys[4];
+ result |= *(int32_t*)xs - *(int32_t*)ys;
+ return result;
+ case 5:
+ result = xs[4] - ys[4];
+ result |= *(int32_t*)xs - *(int32_t*)ys;
+ return result;
+ case 4:
+ return *(int32_t*)xs - *(int32_t*)ys;
+ case 3:
+ result = xs[2] - ys[2];
+ result |= xs[1] - ys[1];
+ result |= xs[0] - ys[0];
+ return result;
+ case 2:
+ result = xs[1] - ys[1];
+ result |= xs[0] - ys[0];
+ return result;
+ case 1:
+ return *xs - *ys;
+ case 0:
+ return 0;
+ }
+ }
+ else
+ {
+ /* unaligned */
+ int result = 0;
+ switch (length)
+ {
+ case 8: result |= xs[7] - ys[7];
+ case 7: result |= xs[6] - ys[6];
+ case 6: result |= xs[5] - ys[5];
+ case 5: result |= xs[4] - ys[4];
+ case 4: result |= xs[3] - ys[3];
+ case 3: result |= xs[2] - ys[2];
+ case 2: result |= xs[1] - ys[1];
+ case 1: result |= xs[0] - ys[0];
+ case 0:
+ return result;
+ }
+ }
+ }
+
+ /* memcmp for longer strings */
+ return memcmp (xs, ys, length);
+}
+#endif
+
+MY_INLINE int
+strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
+{
+#ifndef ELECTRIC_HEAP
+ assert ( !((size_t)ys & 3) );
+#endif
+ if (!((size_t)xs & 3))
+ {
+ /* aligned */
+ int result;
+ unsigned reminder = length & 7;
+ length >>= 3;
+ while (length-- > 0)
+ {
+ result = *(int32_t*)xs - *(int32_t*)ys;
+ result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
+ if (MY_PREDICT_FALSE(result))
+ return result;
+ xs += 8;
+ ys += 8;
+ }
+ switch (reminder)
+ {
+ case 7:
+ result = *(int32_t*)xs - *(int32_t*)ys;
+ result |= xs[6] - ys[6];
+ result |= xs[5] - ys[5];
+ result |= xs[4] - ys[4];
+ return result;
+ case 6:
+ result = *(int32_t*)xs - *(int32_t*)ys;
+ result |= xs[5] - ys[5];
+ result |= xs[4] - ys[4];
+ return result;
+ case 5:
+ result = *(int32_t*)xs - *(int32_t*)ys;
+ result |= xs[4] - ys[4];
+ return result;
+ case 4:
+ return *(int32_t*)xs - *(int32_t*)ys;
+ case 3:
+ result = xs[2] - ys[2];
+ result |= xs[1] - ys[1];
+ result |= xs[0] - ys[0];
+ return result;
+ case 2:
+ result = xs[1] - ys[1];
+ result |= xs[0] - ys[0];
+ return result;
+ case 1:
+ return *xs - *ys;
+ default:
+ case 0:
+ return 0;
+ }
+ }
+ else
+ {
+ /* unaligned */
+ int result;
+ unsigned reminder = length & 7;
+ length >>= 3;
+ while (length-- > 0)
+ {
+#if defined(__i386__) || defined(__x86_64__)
+ result = ( ((int32_t)xs[3] << 24)
+ | ((int32_t)xs[2] << 16)
+ | ((int32_t)xs[1] << 8)
+ | xs[0] )
+ - *(int32_t*)ys;
+ result |= ( ((int32_t)xs[7] << 24)
+ | ((int32_t)xs[6] << 16)
+ | ((int32_t)xs[5] << 8)
+ | xs[4] )
+ - *(int32_t*)(ys + 4);
+#else
+ result = xs[3] - ys[3];
+ result |= xs[2] - ys[2];
+ result |= xs[1] - ys[1];
+ result |= xs[0] - ys[0];
+ result |= xs[7] - ys[7];
+ result |= xs[6] - ys[6];
+ result |= xs[5] - ys[5];
+ result |= xs[4] - ys[4];
+#endif
+ if (MY_PREDICT_FALSE(result))
+ return result;
+ xs += 8;
+ ys += 8;
+ }
+
+ result = 0;
+ switch (reminder)
+ {
+ case 7: result |= xs[6] - ys[6]; /* fall thru */
+ case 6: result |= xs[5] - ys[5]; /* fall thru */
+ case 5: result |= xs[4] - ys[4]; /* fall thru */
+ case 4: result |= xs[3] - ys[3]; /* fall thru */
+ case 3: result |= xs[2] - ys[2]; /* fall thru */
+ case 2: result |= xs[1] - ys[1]; /* fall thru */
+ case 1: result |= xs[0] - ys[0]; /* fall thru */
+ return result;
+ default:
+ case 0:
+ return 0;
+ }
+ }
+}
+
+MY_INLINE int
+strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
+ const char *str, unsigned int length, unsigned int hash)
+{
+ assert (!cache->case_insensitive);
+
+ /* the simple stuff first. */
+ if ( entry->hash != hash
+ || entry->length != length)
+ return 0;
+
+#if 0
+ return memcmp (str, entry + 1, length) == 0;
+#elif 1
+ return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
+#else
+ return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
+#endif
+}
+
+#if defined(HAVE_CASE_INSENSITIVE_FS)
+MY_INLINE int
+strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
+ const char *str, unsigned int length, unsigned int hash)
+{
+ assert (cache->case_insensitive);
+
+ /* the simple stuff first. */
+ if ( entry->hash != hash
+ || entry->length != length)
+ return 0;
+
+# if defined(_MSC_VER) || defined(__OS2__)
+ return _memicmp (entry + 1, str, length) == 0;
+# else
+ return strncasecmp ((const char *)(entry + 1), str, length) == 0;
+# endif
+}
+#endif /* HAVE_CASE_INSENSITIVE_FS */
+
+static void
+strcache2_rehash (struct strcache2 *cache)
+{
+ unsigned int src = cache->hash_size;
+ struct strcache2_entry **src_tab = cache->hash_tab;
+ struct strcache2_entry **dst_tab;
+#ifndef STRCACHE2_USE_MASK
+ unsigned int hash_shift;
+#endif
+
+ /* Allocate a new hash table twice the size of the current. */
+ cache->hash_size <<= 1;
+#ifdef STRCACHE2_USE_MASK
+ cache->hash_mask <<= 1;
+ cache->hash_mask |= 1;
+#else
+ for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
+ /* nothing */;
+ cache->hash_div = strcache2_find_prime (hash_shift);
+#endif
+ cache->rehash_count <<= 1;
+ cache->hash_tab = dst_tab = (struct strcache2_entry **)
+ xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
+ memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
+
+ /* Copy the entries from the old to the new hash table. */
+ cache->collision_count = 0;
+ while (src-- > 0)
+ {
+ struct strcache2_entry *entry = src_tab[src];
+ while (entry)
+ {
+ struct strcache2_entry *next = entry->next;
+ unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash);
+ if ((entry->next = dst_tab[dst]) != 0)
+ cache->collision_count++;
+ dst_tab[dst] = entry;
+
+ entry = next;
+ }
+ }
+
+ /* That's it, just free the old table and we're done. */
+ free (src_tab);
+}
+
+static struct strcache2_seg *
+strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
+{
+ struct strcache2_seg *seg;
+ size_t size;
+ size_t off;
+
+ size = cache->def_seg_size;
+ if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
+ {
+ size = (size_t)minlen * 2;
+ size = (size + 0xfff) & ~(size_t)0xfff;
+ }
+
+ seg = xmalloc (size);
+ seg->start = (char *)(seg + 1);
+ seg->size = size - sizeof (struct strcache2_seg);
+ off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
+ if (off)
+ {
+ off = STRCACHE2_ENTRY_ALIGNMENT - off;
+ seg->start += off;
+ seg->size -= off;
+ }
+ assert (seg->size > minlen);
+ seg->cursor = seg->start;
+ seg->avail = seg->size;
+
+ seg->next = cache->seg_head;
+ cache->seg_head = seg;
+
+ return seg;
+}
+
+/* Internal worker that enters a new string into the cache. */
+static const char *
+strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
+ const char *str, unsigned int length,
+ unsigned int hash)
+{
+ struct strcache2_entry *entry;
+ struct strcache2_seg *seg;
+ unsigned int size;
+ char *str_copy;
+
+ /* Allocate space for the string. */
+
+ size = length + 1 + sizeof (struct strcache2_entry);
+ size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
+
+ seg = cache->seg_head;
+ if (MY_PREDICT_FALSE(seg->avail < size))
+ {
+ do
+ seg = seg->next;
+ while (seg && seg->avail < size);
+ if (!seg)
+ seg = strcache2_new_seg (cache, size);
+ }
+
+ entry = (struct strcache2_entry *) seg->cursor;
+ assert (!((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1)));
+ seg->cursor += size;
+ seg->avail -= size;
+
+ /* Setup the entry, copy the string and insert it into the hash table. */
+
+ entry->user = NULL;
+ entry->length = length;
+ entry->hash = hash;
+ str_copy = (char *) memcpy (entry + 1, str, length);
+ str_copy[length] = '\0';
+
+ if ((entry->next = cache->hash_tab[idx]) != 0)
+ cache->collision_count++;
+ cache->hash_tab[idx] = entry;
+ cache->count++;
+ if (cache->count >= cache->rehash_count)
+ strcache2_rehash (cache);
+
+ return str_copy;
+}
+
+/* The public add string interface. */
+const char *
+strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
+{
+ struct strcache2_entry const *entry;
+ unsigned int hash = strcache2_case_sensitive_hash (str, length);
+ unsigned int idx;
+
+ assert (!cache->case_insensitive);
+ assert (!memchr (str, '\0', length));
+
+ MAKE_STATS (cache->lookup_count++);
+
+ /* Lookup the entry in the hash table, hoping for an
+ early match. If not found, enter the string at IDX. */
+ idx = STRCACHE2_MOD_IT (cache, hash);
+ entry = cache->hash_tab[idx];
+ if (!entry)
+ return strcache2_enter_string (cache, idx, str, length, hash);
+ if (strcache2_is_equal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_1st_count++);
+
+ entry = entry->next;
+ if (!entry)
+ return strcache2_enter_string (cache, idx, str, length, hash);
+ if (strcache2_is_equal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_2nd_count++);
+
+ /* Loop the rest. */
+ for (;;)
+ {
+ entry = entry->next;
+ if (!entry)
+ return strcache2_enter_string (cache, idx, str, length, hash);
+ if (strcache2_is_equal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_3rd_count++);
+ }
+ /* not reached */
+}
+
+/* The public add string interface for prehashed strings.
+ Use strcache2_hash_str to calculate the hash of a string. */
+const char *
+strcache2_add_hashed (struct strcache2 *cache, const char *str,
+ unsigned int length, unsigned int hash)
+{
+ struct strcache2_entry const *entry;
+ unsigned int idx;
+#ifndef NDEBUG
+ unsigned correct_hash;
+
+ assert (!cache->case_insensitive);
+ assert (!memchr (str, '\0', length));
+ correct_hash = strcache2_case_sensitive_hash (str, length);
+ MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
+#endif /* NDEBUG */
+
+ MAKE_STATS (cache->lookup_count++);
+
+ /* Lookup the entry in the hash table, hoping for an
+ early match. If not found, enter the string at IDX. */
+ idx = STRCACHE2_MOD_IT (cache, hash);
+ entry = cache->hash_tab[idx];
+ if (!entry)
+ return strcache2_enter_string (cache, idx, str, length, hash);
+ if (strcache2_is_equal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_1st_count++);
+
+ entry = entry->next;
+ if (!entry)
+ return strcache2_enter_string (cache, idx, str, length, hash);
+ if (strcache2_is_equal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_2nd_count++);
+
+ /* Loop the rest. */
+ for (;;)
+ {
+ entry = entry->next;
+ if (!entry)
+ return strcache2_enter_string (cache, idx, str, length, hash);
+ if (strcache2_is_equal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_3rd_count++);
+ }
+ /* not reached */
+}
+
+/* The public lookup (case sensitive) string interface. */
+const char *
+strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
+{
+ struct strcache2_entry const *entry;
+ unsigned int hash = strcache2_case_sensitive_hash (str, length);
+ unsigned int idx;
+
+ assert (!cache->case_insensitive);
+ assert (!memchr (str, '\0', length));
+
+ MAKE_STATS (cache->lookup_count++);
+
+ /* Lookup the entry in the hash table, hoping for an
+ early match. */
+ idx = STRCACHE2_MOD_IT (cache, hash);
+ entry = cache->hash_tab[idx];
+ if (!entry)
+ return NULL;
+ if (strcache2_is_equal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_1st_count++);
+
+ entry = entry->next;
+ if (!entry)
+ return NULL;
+ if (strcache2_is_equal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_2nd_count++);
+
+ /* Loop the rest. */
+ for (;;)
+ {
+ entry = entry->next;
+ if (!entry)
+ return NULL;
+ if (strcache2_is_equal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_3rd_count++);
+ }
+ /* not reached */
+}
+
+#if defined(HAVE_CASE_INSENSITIVE_FS)
+
+/* The public add string interface for case insensitive strings. */
+const char *
+strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
+{
+ struct strcache2_entry const *entry;
+ unsigned int hash = strcache2_case_insensitive_hash (str, length);
+ unsigned int idx;
+
+ assert (cache->case_insensitive);
+ assert (!memchr (str, '\0', length));
+
+ MAKE_STATS (cache->lookup_count++);
+
+ /* Lookup the entry in the hash table, hoping for an
+ early match. If not found, enter the string at IDX. */
+ idx = STRCACHE2_MOD_IT (cache, hash);
+ entry = cache->hash_tab[idx];
+ if (!entry)
+ return strcache2_enter_string (cache, idx, str, length, hash);
+ if (strcache2_is_iequal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_1st_count++);
+
+ entry = entry->next;
+ if (!entry)
+ return strcache2_enter_string (cache, idx, str, length, hash);
+ if (strcache2_is_iequal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_2nd_count++);
+
+ /* Loop the rest. */
+ for (;;)
+ {
+ entry = entry->next;
+ if (!entry)
+ return strcache2_enter_string (cache, idx, str, length, hash);
+ if (strcache2_is_iequal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_3rd_count++);
+ }
+ /* not reached */
+}
+
+/* The public add string interface for prehashed case insensitive strings.
+ Use strcache2_hash_istr to calculate the hash of a string. */
+const char *
+strcache2_iadd_hashed (struct strcache2 *cache, const char *str,
+ unsigned int length, unsigned int hash)
+{
+ struct strcache2_entry const *entry;
+ unsigned int idx;
+#ifndef NDEBUG
+ unsigned correct_hash;
+
+ assert (cache->case_insensitive);
+ assert (!memchr (str, '\0', length));
+ correct_hash = strcache2_case_insensitive_hash (str, length);
+ MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
+#endif /* NDEBUG */
+
+ MAKE_STATS (cache->lookup_count++);
+
+ /* Lookup the entry in the hash table, hoping for an
+ early match. If not found, enter the string at IDX. */
+ idx = STRCACHE2_MOD_IT (cache, hash);
+ entry = cache->hash_tab[idx];
+ if (!entry)
+ return strcache2_enter_string (cache, idx, str, length, hash);
+ if (strcache2_is_iequal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_1st_count++);
+
+ entry = entry->next;
+ if (!entry)
+ return strcache2_enter_string (cache, idx, str, length, hash);
+ if (strcache2_is_iequal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_2nd_count++);
+
+ /* Loop the rest. */
+ for (;;)
+ {
+ entry = entry->next;
+ if (!entry)
+ return strcache2_enter_string (cache, idx, str, length, hash);
+ if (strcache2_is_iequal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_3rd_count++);
+ }
+ /* not reached */
+}
+
+/* The public lookup (case insensitive) string interface. */
+const char *
+strcache2_ilookup (struct strcache2 *cache, const char *str, unsigned int length)
+{
+ struct strcache2_entry const *entry;
+ unsigned int hash = strcache2_case_insensitive_hash (str, length);
+ unsigned int idx;
+
+ assert (cache->case_insensitive);
+ assert (!memchr (str, '\0', length));
+
+ MAKE_STATS (cache->lookup_count++);
+
+ /* Lookup the entry in the hash table, hoping for an
+ early match. */
+ idx = STRCACHE2_MOD_IT (cache, hash);
+ entry = cache->hash_tab[idx];
+ if (!entry)
+ return NULL;
+ if (strcache2_is_iequal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_1st_count++);
+
+ entry = entry->next;
+ if (!entry)
+ return NULL;
+ if (strcache2_is_iequal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_2nd_count++);
+
+ /* Loop the rest. */
+ for (;;)
+ {
+ entry = entry->next;
+ if (!entry)
+ return NULL;
+ if (strcache2_is_iequal (cache, entry, str, length, hash))
+ return (const char *)(entry + 1);
+ MAKE_STATS (cache->collision_3rd_count++);
+ }
+ /* not reached */
+}
+
+#endif /* HAVE_CASE_INSENSITIVE_FS */
+
+/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
+int
+strcache2_is_cached (struct strcache2 *cache, const char *str)
+{
+ /* Check mandatory alignment first. */
+ if (!((size_t)str & (sizeof (void *) - 1)))
+ {
+ /* Check the segment list and consider the question answered if the
+ string is within one of them. (Could check it more thoroughly...) */
+ struct strcache2_seg const *seg;
+ for (seg = cache->seg_head; seg; seg = seg->next)
+ if ((size_t)(str - seg->start) < seg->size)
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/* Verify the integrity of the specified string, returning 0 if OK. */
+int
+strcache2_verify_entry (struct strcache2 *cache, const char *str)
+{
+ struct strcache2_entry const *entry;
+ unsigned int hash;
+ unsigned int length;
+ const char *end;
+
+ entry = (struct strcache2_entry const *)str - 1;
+ if ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1))
+ {
+ fprintf (stderr,
+ "strcache2[%s]: missaligned entry %p\nstring: %p=%s\n",
+ cache->name, (void *)entry, (void *)str, str);
+ return -1;
+ }
+
+ end = memchr (str, '\0', entry->length + 1);
+ length = end - str;
+ if (length != entry->length)
+ {
+ fprintf (stderr,
+ "strcache2[%s]: corrupt entry %p, length: %u, expected %u;\nstring: %s\n",
+ cache->name, (void *)entry, length, entry->length, str);
+ return -1;
+ }
+
+ hash = cache->case_insensitive
+ ? strcache2_case_insensitive_hash (str, entry->length)
+ : strcache2_case_sensitive_hash (str, entry->length);
+ if (hash != entry->hash)
+ {
+ fprintf (stderr,
+ "strcache2[%s]: corrupt entry %p, hash: %x, expected %x;\nstring: %s\n",
+ cache->name, (void *)entry, hash, entry->hash, str);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/* Calculates the case sensitive hash values of the string.
+ The first hash is returned, the other is put at HASH2P. */
+unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
+{
+ *hash2p = 1;
+ return strcache2_case_sensitive_hash (str, length);
+}
+
+/* Calculates the case insensitive hash values of the string.
+ The first hash is returned, the other is put at HASH2P. */
+unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
+{
+ *hash2p = 1;
+ return strcache2_case_insensitive_hash (str, length);
+}
+
+
+
+/* Initalizes a new cache. */
+void
+strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
+ unsigned int def_seg_size, int case_insensitive, int thread_safe)
+{
+ unsigned hash_shift;
+ assert (!thread_safe);
+
+ /* calc the size as a power of two */
+ if (!size)
+ hash_shift = STRCACHE2_HASH_SHIFT;
+ else
+ {
+ assert (size <= (~0U / 2 + 1));
+ for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
+ /* nothing */;
+ }
+
+ /* adjust the default segment size */
+ if (!def_seg_size)
+ def_seg_size = STRCACHE2_SEG_SIZE;
+ else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
+ def_seg_size = sizeof (struct strcache2_seg) * 10;
+ else if ((def_seg_size & 0xfff) < 0xf00)
+ def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
+
+
+ /* init the structure. */
+ cache->case_insensitive = case_insensitive;
+#ifdef STRCACHE2_USE_MASK
+ cache->hash_mask = (1U << hash_shift) - 1U;
+#else
+ cache->hash_div = strcache2_find_prime(hash_shift);
+#endif
+ cache->count = 0;
+ cache->collision_count = 0;
+ cache->lookup_count = 0;
+ cache->collision_1st_count = 0;
+ cache->collision_2nd_count = 0;
+ cache->collision_3rd_count = 0;
+ cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
+ cache->init_size = 1U << hash_shift;
+ cache->hash_size = 1U << hash_shift;
+ cache->def_seg_size = def_seg_size;
+ cache->lock = NULL;
+ cache->name = name;
+
+ /* allocate the hash table and first segment. */
+ cache->hash_tab = (struct strcache2_entry **)
+ xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
+ memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
+ strcache2_new_seg (cache, 0);
+
+ /* link it */
+ cache->next = strcache_head;
+ strcache_head = cache;
+}
+
+
+/* Terminates a string cache, freeing all memory and other
+ associated resources. */
+void
+strcache2_term (struct strcache2 *cache)
+{
+ /* unlink it */
+ if (strcache_head == cache)
+ strcache_head = cache->next;
+ else
+ {
+ struct strcache2 *prev = strcache_head;
+ while (prev->next != cache)
+ prev = prev->next;
+ assert (prev);
+ prev->next = cache->next;
+ }
+
+ /* free the memory segments */
+ do
+ {
+ void *free_it = cache->seg_head;
+ cache->seg_head = cache->seg_head->next;
+ free (free_it);
+ }
+ while (cache->seg_head);
+
+ /* free the hash and clear the structure. */
+ free (cache->hash_tab);
+ memset (cache, '\0', sizeof (struct strcache2));
+}
+
+/* Print statistics a string cache. */
+void
+strcache2_print_stats (struct strcache2 *cache, const char *prefix)
+{
+ unsigned int seg_count = 0;
+ unsigned long seg_total_bytes = 0;
+ unsigned long seg_avg_bytes;
+ unsigned long seg_avail_bytes = 0;
+ unsigned long seg_max_bytes = 0;
+ struct strcache2_seg *seg;
+ unsigned int str_count = 0;
+ unsigned long str_total_len = 0;
+ unsigned int str_avg_len;
+ unsigned int str_min_len = ~0U;
+ unsigned int str_max_len = 0;
+ unsigned int idx;
+ unsigned int rehashes;
+ unsigned int chain_depths[32];
+
+ printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
+
+ /* Segment statistics. */
+ for (seg = cache->seg_head; seg; seg = seg->next)
+ {
+ seg_count++;
+ seg_total_bytes += seg->size;
+ seg_avail_bytes += seg->avail;
+ if (seg->size > seg_max_bytes)
+ seg_max_bytes = seg->size;
+ }
+ seg_avg_bytes = seg_total_bytes / seg_count;
+ printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
+ prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
+ cache->def_seg_size, seg_avail_bytes);
+
+ /* String statistics. */
+ memset (chain_depths, '\0', sizeof (chain_depths));
+ idx = cache->hash_size;
+ while (idx-- > 0)
+ {
+ struct strcache2_entry const *entry = cache->hash_tab[idx];
+ unsigned int depth = 0;
+ for (; entry != 0; entry = entry->next, depth++)
+ {
+ unsigned int length = entry->length;
+ str_total_len += length;
+ if (length > str_max_len)
+ str_max_len = length;
+ if (length < str_min_len)
+ str_min_len = length;
+ str_count++;
+ }
+ chain_depths[depth >= 32 ? 31 : depth]++;
+ }
+ str_avg_len = cache->count ? str_total_len / cache->count : 0;
+ printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
+ prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
+ if (str_count != cache->count)
+ printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
+ cache->count, str_count);
+
+ /* Hash statistics. */
+ idx = cache->init_size;
+ rehashes = 0;
+ while (idx < cache->hash_size)
+ {
+ idx *= 2;
+ rehashes++;
+ }
+
+#ifdef STRCACHE2_USE_MASK
+ printf (_("%s hash size = %u mask = %#x rehashed %u times"),
+ prefix, cache->hash_size, cache->hash_mask, rehashes);
+#else
+ printf (_("%s hash size = %u div = %#x rehashed %u times"),
+ prefix, cache->hash_size, cache->hash_div, rehashes);
+#endif
+ if (cache->lookup_count)
+ printf (_("%s lookups = %lu\n"
+ "%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)"),
+ prefix, cache->lookup_count,
+ prefix,
+ cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
+ cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
+ cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
+ printf (_("\n%s hash insert collisions = %u (%u%%)\n"),
+ prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
+ printf (_("%s %5u (%u%%) empty hash table slots\n"),
+ prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
+ printf (_("%s %5u (%u%%) occupied hash table slots\n"),
+ prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
+ for (idx = 2; idx < 32; idx++)
+ {
+ unsigned strs_at_this_depth = chain_depths[idx];
+ unsigned i;
+ for (i = idx + 1; i < 32; i++)
+ strs_at_this_depth += chain_depths[i];
+ if (strs_at_this_depth)
+ printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
+ prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
+ idx - 1, idx == 2 ? " " : "s",
+ strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
+ }
+}
+
+/* Print statistics for all string caches. */
+void
+strcache2_print_stats_all (const char *prefix)
+{
+ struct strcache2 *cur;
+ for (cur = strcache_head; cur; cur = cur->next)
+ strcache2_print_stats (cur, prefix);
+}
+