summaryrefslogtreecommitdiffstats
path: root/hashlib.c
diff options
context:
space:
mode:
Diffstat (limited to 'hashlib.c')
-rw-r--r--hashlib.c545
1 files changed, 545 insertions, 0 deletions
diff --git a/hashlib.c b/hashlib.c
new file mode 100644
index 0000000..4a7e813
--- /dev/null
+++ b/hashlib.c
@@ -0,0 +1,545 @@
+/* hashlib.c -- functions to manage and access hash tables for bash. */
+
+/* Copyright (C) 1987,1989,1991,1995,1998,2001,2003,2005,2006,2008,2009 Free Software Foundation, Inc.
+
+ This file is part of GNU Bash, the Bourne Again SHell.
+
+ Bash is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Bash is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Bash. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <config.h>
+
+#include "bashansi.h"
+
+#if defined (HAVE_UNISTD_H)
+# ifdef _MINIX
+# include <sys/types.h>
+# endif
+# include <unistd.h>
+#endif
+
+#include <stdio.h>
+
+#include "shell.h"
+#include "hashlib.h"
+
+/* tunable constants for rehashing */
+#define HASH_REHASH_MULTIPLIER 4
+#define HASH_REHASH_FACTOR 2
+
+#define HASH_SHOULDGROW(table) \
+ ((table)->nentries >= (table)->nbuckets * HASH_REHASH_FACTOR)
+
+/* an initial approximation */
+#define HASH_SHOULDSHRINK(table) \
+ (((table)->nbuckets > DEFAULT_HASH_BUCKETS) && \
+ ((table)->nentries < (table)->nbuckets / HASH_REHASH_MULTIPLIER))
+
+/* Rely on properties of unsigned division (unsigned/int -> unsigned) and
+ don't discard the upper 32 bits of the value, if present. */
+#define HASH_BUCKET(s, t, h) (((h) = hash_string (s)) & ((t)->nbuckets - 1))
+
+static BUCKET_CONTENTS *copy_bucket_array PARAMS((BUCKET_CONTENTS *, sh_string_func_t *));
+
+static void hash_rehash PARAMS((HASH_TABLE *, int));
+static void hash_grow PARAMS((HASH_TABLE *));
+static void hash_shrink PARAMS((HASH_TABLE *));
+
+/* Make a new hash table with BUCKETS number of buckets. Initialize
+ each slot in the table to NULL. */
+HASH_TABLE *
+hash_create (buckets)
+ int buckets;
+{
+ HASH_TABLE *new_table;
+ register int i;
+
+ new_table = (HASH_TABLE *)xmalloc (sizeof (HASH_TABLE));
+ if (buckets == 0)
+ buckets = DEFAULT_HASH_BUCKETS;
+
+ new_table->bucket_array =
+ (BUCKET_CONTENTS **)xmalloc (buckets * sizeof (BUCKET_CONTENTS *));
+ new_table->nbuckets = buckets;
+ new_table->nentries = 0;
+
+ for (i = 0; i < buckets; i++)
+ new_table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
+
+ return (new_table);
+}
+
+int
+hash_size (table)
+ HASH_TABLE *table;
+{
+ return (HASH_ENTRIES(table));
+}
+
+static BUCKET_CONTENTS *
+copy_bucket_array (ba, cpdata)
+ BUCKET_CONTENTS *ba;
+ sh_string_func_t *cpdata; /* data copy function */
+{
+ BUCKET_CONTENTS *new_bucket, *n, *e;
+
+ if (ba == 0)
+ return ((BUCKET_CONTENTS *)0);
+
+ for (n = (BUCKET_CONTENTS *)0, e = ba; e; e = e->next)
+ {
+ if (n == 0)
+ {
+ new_bucket = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
+ n = new_bucket;
+ }
+ else
+ {
+ n->next = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
+ n = n->next;
+ }
+
+ n->key = savestring (e->key);
+ n->data = e->data ? (cpdata ? (*cpdata) (e->data) : savestring (e->data))
+ : NULL;
+ n->khash = e->khash;
+ n->times_found = e->times_found;
+ n->next = (BUCKET_CONTENTS *)NULL;
+ }
+
+ return new_bucket;
+}
+
+static void
+hash_rehash (table, nsize)
+ HASH_TABLE *table;
+ int nsize;
+{
+ int osize, i, j;
+ BUCKET_CONTENTS **old_bucket_array, *item, *next;
+
+ if (table == NULL || nsize == table->nbuckets)
+ return;
+
+ osize = table->nbuckets;
+ old_bucket_array = table->bucket_array;
+
+ table->nbuckets = nsize;
+ table->bucket_array = (BUCKET_CONTENTS **)xmalloc (table->nbuckets * sizeof (BUCKET_CONTENTS *));
+ for (i = 0; i < table->nbuckets; i++)
+ table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
+
+ for (j = 0; j < osize; j++)
+ {
+ for (item = old_bucket_array[j]; item; item = next)
+ {
+ next = item->next;
+ i = item->khash & (table->nbuckets - 1);
+ item->next = table->bucket_array[i];
+ table->bucket_array[i] = item;
+ }
+ }
+
+ free (old_bucket_array);
+}
+
+static void
+hash_grow (table)
+ HASH_TABLE *table;
+{
+ int nsize;
+
+ nsize = table->nbuckets * HASH_REHASH_MULTIPLIER;
+ if (nsize > 0) /* overflow */
+ hash_rehash (table, nsize);
+}
+
+static void
+hash_shrink (table)
+ HASH_TABLE *table;
+{
+ int nsize;
+
+ nsize = table->nbuckets / HASH_REHASH_MULTIPLIER;
+ hash_rehash (table, nsize);
+}
+
+HASH_TABLE *
+hash_copy (table, cpdata)
+ HASH_TABLE *table;
+ sh_string_func_t *cpdata;
+{
+ HASH_TABLE *new_table;
+ int i;
+
+ if (table == 0)
+ return ((HASH_TABLE *)NULL);
+
+ new_table = hash_create (table->nbuckets);
+
+ for (i = 0; i < table->nbuckets; i++)
+ new_table->bucket_array[i] = copy_bucket_array (table->bucket_array[i], cpdata);
+
+ new_table->nentries = table->nentries;
+ return new_table;
+}
+
+/* This is the best 32-bit string hash function I found. It's one of the
+ Fowler-Noll-Vo family (FNV-1).
+
+ The magic is in the interesting relationship between the special prime
+ 16777619 (2^24 + 403) and 2^32 and 2^8. */
+
+#define FNV_OFFSET 2166136261
+#define FNV_PRIME 16777619
+
+/* If you want to use 64 bits, use
+FNV_OFFSET 14695981039346656037
+FNV_PRIME 1099511628211
+*/
+
+/* The `khash' check below requires that strings that compare equally with
+ strcmp hash to the same value. */
+unsigned int
+hash_string (s)
+ const char *s;
+{
+ register unsigned int i;
+
+ for (i = FNV_OFFSET; *s; s++)
+ {
+ /* FNV-1a has the XOR first, traditional FNV-1 has the multiply first */
+
+ /* was i *= FNV_PRIME */
+ i += (i<<1) + (i<<4) + (i<<7) + (i<<8) + (i<<24);
+ i ^= *s;
+ }
+
+ return i;
+}
+
+/* Return the location of the bucket which should contain the data
+ for STRING. TABLE is a pointer to a HASH_TABLE. */
+
+int
+hash_bucket (string, table)
+ const char *string;
+ HASH_TABLE *table;
+{
+ unsigned int h;
+
+ return (HASH_BUCKET (string, table, h));
+}
+
+/* Return a pointer to the hashed item. If the HASH_CREATE flag is passed,
+ create a new hash table entry for STRING, otherwise return NULL. */
+BUCKET_CONTENTS *
+hash_search (string, table, flags)
+ const char *string;
+ HASH_TABLE *table;
+ int flags;
+{
+ BUCKET_CONTENTS *list;
+ int bucket;
+ unsigned int hv;
+
+ if (table == 0 || ((flags & HASH_CREATE) == 0 && HASH_ENTRIES (table) == 0))
+ return (BUCKET_CONTENTS *)NULL;
+
+ bucket = HASH_BUCKET (string, table, hv);
+
+ for (list = table->bucket_array ? table->bucket_array[bucket] : 0; list; list = list->next)
+ {
+ /* This is the comparison function */
+ if (hv == list->khash && STREQ (list->key, string))
+ {
+ list->times_found++;
+ return (list);
+ }
+ }
+
+ if (flags & HASH_CREATE)
+ {
+ if (HASH_SHOULDGROW (table))
+ {
+ hash_grow (table);
+ bucket = HASH_BUCKET (string, table, hv);
+ }
+
+ list = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
+ list->next = table->bucket_array[bucket];
+ table->bucket_array[bucket] = list;
+
+ list->data = NULL;
+ list->key = (char *)string; /* XXX fix later */
+ list->khash = hv;
+ list->times_found = 0;
+
+ table->nentries++;
+ return (list);
+ }
+
+ return (BUCKET_CONTENTS *)NULL;
+}
+
+/* Remove the item specified by STRING from the hash table TABLE.
+ The item removed is returned, so you can free its contents. If
+ the item isn't in this table NULL is returned. */
+BUCKET_CONTENTS *
+hash_remove (string, table, flags)
+ const char *string;
+ HASH_TABLE *table;
+ int flags;
+{
+ int bucket;
+ BUCKET_CONTENTS *prev, *temp;
+ unsigned int hv;
+
+ if (table == 0 || HASH_ENTRIES (table) == 0)
+ return (BUCKET_CONTENTS *)NULL;
+
+ bucket = HASH_BUCKET (string, table, hv);
+ prev = (BUCKET_CONTENTS *)NULL;
+ for (temp = table->bucket_array[bucket]; temp; temp = temp->next)
+ {
+ if (hv == temp->khash && STREQ (temp->key, string))
+ {
+ if (prev)
+ prev->next = temp->next;
+ else
+ table->bucket_array[bucket] = temp->next;
+
+ table->nentries--;
+ return (temp);
+ }
+ prev = temp;
+ }
+ return ((BUCKET_CONTENTS *) NULL);
+}
+
+/* Create an entry for STRING, in TABLE. If the entry already
+ exists, then return it (unless the HASH_NOSRCH flag is set). */
+BUCKET_CONTENTS *
+hash_insert (string, table, flags)
+ char *string;
+ HASH_TABLE *table;
+ int flags;
+{
+ BUCKET_CONTENTS *item;
+ int bucket;
+ unsigned int hv;
+
+ if (table == 0)
+ table = hash_create (0);
+
+ item = (flags & HASH_NOSRCH) ? (BUCKET_CONTENTS *)NULL
+ : hash_search (string, table, 0);
+
+ if (item == 0)
+ {
+ if (HASH_SHOULDGROW (table))
+ hash_grow (table);
+
+ bucket = HASH_BUCKET (string, table, hv);
+
+ item = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
+ item->next = table->bucket_array[bucket];
+ table->bucket_array[bucket] = item;
+
+ item->data = NULL;
+ item->key = string;
+ item->khash = hv;
+ item->times_found = 0;
+
+ table->nentries++;
+ }
+
+ return (item);
+}
+
+/* Remove and discard all entries in TABLE. If FREE_DATA is non-null, it
+ is a function to call to dispose of a hash item's data. Otherwise,
+ free() is called. */
+void
+hash_flush (table, free_data)
+ HASH_TABLE *table;
+ sh_free_func_t *free_data;
+{
+ int i;
+ register BUCKET_CONTENTS *bucket, *item;
+
+ if (table == 0 || HASH_ENTRIES (table) == 0)
+ return;
+
+ for (i = 0; i < table->nbuckets; i++)
+ {
+ bucket = table->bucket_array[i];
+
+ while (bucket)
+ {
+ item = bucket;
+ bucket = bucket->next;
+
+ if (free_data)
+ (*free_data) (item->data);
+ else
+ free (item->data);
+ free (item->key);
+ free (item);
+ }
+ table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
+ }
+
+ table->nentries = 0;
+}
+
+/* Free the hash table pointed to by TABLE. */
+void
+hash_dispose (table)
+ HASH_TABLE *table;
+{
+ free (table->bucket_array);
+ free (table);
+}
+
+void
+hash_walk (table, func)
+ HASH_TABLE *table;
+ hash_wfunc *func;
+{
+ register int i;
+ BUCKET_CONTENTS *item;
+
+ if (table == 0 || HASH_ENTRIES (table) == 0)
+ return;
+
+ for (i = 0; i < table->nbuckets; i++)
+ {
+ for (item = hash_items (i, table); item; item = item->next)
+ if ((*func) (item) < 0)
+ return;
+ }
+}
+
+#if defined (DEBUG) || defined (TEST_HASHING)
+void
+hash_pstats (table, name)
+ HASH_TABLE *table;
+ char *name;
+{
+ register int slot, bcount;
+ register BUCKET_CONTENTS *bc;
+
+ if (name == 0)
+ name = "unknown hash table";
+
+ fprintf (stderr, "%s: %d buckets; %d items\n", name, table->nbuckets, table->nentries);
+
+ /* Print out a count of how many strings hashed to each bucket, so we can
+ see how even the distribution is. */
+ for (slot = 0; slot < table->nbuckets; slot++)
+ {
+ bc = hash_items (slot, table);
+
+ fprintf (stderr, "\tslot %3d: ", slot);
+ for (bcount = 0; bc; bc = bc->next)
+ bcount++;
+
+ fprintf (stderr, "%d\n", bcount);
+ }
+}
+#endif
+
+#ifdef TEST_HASHING
+
+/* link with xmalloc.o and lib/malloc/libmalloc.a */
+#undef NULL
+#include <stdio.h>
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+HASH_TABLE *table, *ntable;
+
+int interrupt_immediately = 0;
+int running_trap = 0;
+
+int
+signal_is_trapped (s)
+ int s;
+{
+ return (0);
+}
+
+void
+programming_error (const char *format, ...)
+{
+ abort();
+}
+
+void
+fatal_error (const char *format, ...)
+{
+ abort();
+}
+
+void
+internal_warning (const char *format, ...)
+{
+}
+
+int
+main ()
+{
+ char string[256];
+ int count = 0;
+ BUCKET_CONTENTS *tt;
+
+#if defined (TEST_NBUCKETS)
+ table = hash_create (TEST_NBUCKETS);
+#else
+ table = hash_create (0);
+#endif
+
+ for (;;)
+ {
+ char *temp_string;
+ if (fgets (string, sizeof (string), stdin) == 0)
+ break;
+ if (!*string)
+ break;
+ temp_string = savestring (string);
+ tt = hash_insert (temp_string, table, 0);
+ if (tt->times_found)
+ {
+ fprintf (stderr, "You have already added item `%s'\n", string);
+ free (temp_string);
+ }
+ else
+ {
+ count++;
+ }
+ }
+
+ hash_pstats (table, "hash test");
+
+ ntable = hash_copy (table, (sh_string_func_t *)NULL);
+ hash_flush (table, (sh_free_func_t *)NULL);
+ hash_pstats (ntable, "hash copy test");
+
+ exit (0);
+}
+
+#endif /* TEST_HASHING */