summaryrefslogtreecommitdiffstats
path: root/tests/exp/test_hashes.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 12:18:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 12:18:05 +0000
commitb46aad6df449445a9fc4aa7b32bd40005438e3f7 (patch)
tree751aa858ca01f35de800164516b298887382919d /tests/exp/test_hashes.c
parentInitial commit. (diff)
downloadhaproxy-b46aad6df449445a9fc4aa7b32bd40005438e3f7.tar.xz
haproxy-b46aad6df449445a9fc4aa7b32bd40005438e3f7.zip
Adding upstream version 2.9.5.upstream/2.9.5
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests/exp/test_hashes.c')
-rw-r--r--tests/exp/test_hashes.c559
1 files changed, 559 insertions, 0 deletions
diff --git a/tests/exp/test_hashes.c b/tests/exp/test_hashes.c
new file mode 100644
index 0000000..39cb965
--- /dev/null
+++ b/tests/exp/test_hashes.c
@@ -0,0 +1,559 @@
+/*
+ This file only show how many operations a hash is able to handle.
+ It don't show the distribution nor collisions.
+
+ gcc -Wall -O3 -o test_hashes test_hashes.c
+ ./test_hashes |sort -k 3 -r
+ */
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+//#include <stdint.h>
+
+
+static struct timeval timeval_current(void)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return tv;
+}
+
+static double timeval_elapsed(struct timeval *tv)
+{
+ struct timeval tv2 = timeval_current();
+ return (tv2.tv_sec - tv->tv_sec) +
+ (tv2.tv_usec - tv->tv_usec)*1.0e-6;
+}
+
+#define HAPROXY_BACKENDS 4
+
+unsigned long haproxy_uri_hash(char *uri, int uri_len){
+
+ unsigned long hash = 0;
+ int c;
+
+ while (uri_len--) {
+ c = *uri++;
+ if (c == '?')
+ break;
+ hash = c + (hash << 6) + (hash << 16) - hash;
+ }
+
+ return hash%HAPROXY_BACKENDS; /* I assume 4 active backends */
+} /* end haproxy_hash() */
+
+/*
+ * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
+ */
+unsigned sax_hash ( void *key, int len )
+{
+ unsigned char *p = key;
+ unsigned h = 0;
+ int i;
+
+ for ( i = 0; i < len; i++ )
+ h ^= ( h << 5 ) + ( h >> 2 ) + p[i];
+
+ return h;
+}
+
+#include <arpa/inet.h>
+/* len 4 for ipv4 and 16 for ipv6 */
+unsigned int haproxy_server_hash(const char *addr, int len){
+ unsigned int h, l;
+ l = h = 0;
+
+ while ((l + sizeof (int)) <= len) {
+ h ^= ntohl(*(unsigned int *)(&addr[l]));
+ l += sizeof (int);
+ }
+ return h %= HAPROXY_BACKENDS;
+}/* end haproxy_server_hash() */
+
+
+int hashpjw(const void *key) {
+
+ const char *ptr;
+ unsigned int val;
+ /*********************************************************************
+ * *
+ * Hash the key by performing a number of bit operations on it. *
+ * *
+ *********************************************************************/
+
+ val = 0;
+ ptr = key;
+
+ while (*ptr != '\0') {
+
+ int tmp;
+
+ val = (val << 4) + (*ptr);
+
+ if((tmp = (val & 0xf0000000))) {
+ val = val ^ (tmp >> 24);
+ val = val ^ tmp;
+ }
+ ptr++;
+ }/* end while */
+
+ return val;
+}/* end hashpjw */
+
+static unsigned long
+hash_djbx33(
+ register unsigned char *key,
+ register size_t len)
+{
+ register unsigned long hash = 5381;
+
+ /* the hash unrolled eight times */
+ for (; len >= 8; len -= 8) {
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ }
+ switch (len) {
+ case 7: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+ case 6: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+ case 5: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+ case 4: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+ case 3: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+ case 2: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
+ case 1: hash = ((hash << 5) + hash) + *key++; break;
+ default: /* case 0: */ break;
+ }
+ return hash;
+}
+
+typedef unsigned long int ub4; /* unsigned 4-byte quantities */
+typedef unsigned char ub1; /* unsigned 1-byte quantities */
+
+ub4 bernstein(ub1 *key, ub4 len, ub4 level){
+ ub4 hash = level;
+ ub4 i;
+ for (i=0; i<len; ++i) hash = 33*hash + key[i];
+ return hash;
+}
+
+/*
+ * http://www.azillionmonkeys.com/qed/hash.html
+ */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+ || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+ +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+
+uint32_t SuperFastHash (const char * data, int len) {
+uint32_t hash = len, tmp;
+int rem;
+
+ if (len <= 0 || data == NULL) return 0;
+
+ rem = len & 3;
+ len >>= 2;
+
+ /* Main loop */
+ for (;len > 0; len--) {
+ hash += get16bits (data);
+ tmp = (get16bits (data+2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2*sizeof (uint16_t);
+ hash += hash >> 11;
+ }
+
+ /* Handle end cases */
+ switch (rem) {
+ case 3: hash += get16bits (data);
+ hash ^= hash << 16;
+ hash ^= data[sizeof (uint16_t)] << 18;
+ hash += hash >> 11;
+ break;
+ case 2: hash += get16bits (data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1: hash += *data;
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ }
+
+ /* Force "avalanching" of final 127 bits */
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+}
+
+/*
+ * This variant is about 15% faster.
+ */
+uint32_t SuperFastHash2 (const char * data, int len) {
+uint32_t hash = len, tmp;
+int rem;
+
+ if (len <= 0 || data == NULL) return 0;
+
+ rem = len & 3;
+ len >>= 2;
+
+ /* Main loop */
+ for (;len > 0; len--) {
+ register uint32_t next;
+ next = get16bits(data+2);
+ hash += get16bits(data);
+ tmp = (next << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2*sizeof (uint16_t);
+ hash += hash >> 11;
+ }
+
+ /* Handle end cases */
+ switch (rem) {
+ case 3: hash += get16bits (data);
+ hash ^= hash << 16;
+ hash ^= data[sizeof (uint16_t)] << 18;
+ hash += hash >> 11;
+ break;
+ case 2: hash += get16bits (data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1: hash += *data;
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ }
+
+ /* Force "avalanching" of final 127 bits */
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+}
+
+/*
+ * 32 bit FNV-0 hash type
+ */
+typedef unsigned long Fnv32_t;
+
+/*
+ * fnv_32a_str - perform a 32 bit Fowler/Noll/Vo FNV-1a hash on a string
+ *
+ * input:
+ * str - string to hash
+ * hval - previous hash value or 0 if first call
+ *
+ * returns:
+ * 32 bit hash as a static hash type
+ *
+ * NOTE: To use the recommended 32 bit FNV-1a hash, use FNV1_32A_INIT as the
+ * hval arg on the first call to either fnv_32a_buf() or fnv_32a_str().
+ */
+Fnv32_t
+fnv_32a_str(char *str, Fnv32_t hval)
+{
+ unsigned char *s = (unsigned char *)str; /* unsigned string */
+
+ /*
+ * FNV-1a hash each octet in the buffer
+ */
+ while (*s) {
+
+ /* xor the bottom with the current octet */
+ hval ^= (Fnv32_t)*s++;
+
+/* #define NO_FNV_GCC_OPTIMIZATION */
+ /* multiply by the 32 bit FNV magic prime mod 2^32 */
+#if defined(NO_FNV_GCC_OPTIMIZATION)
+ /*
+ * 32 bit magic FNV-1a prime
+ */
+#define FNV_32_PRIME ((Fnv32_t)0x01000193)
+ hval *= FNV_32_PRIME;
+#else
+ hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
+#endif
+ }
+
+ /* return our new hash value */
+ return hval;
+}
+
+/*
+ * from lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ */
+
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+/*
+-------------------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+
+This is reversible, so any information in (a,b,c) before mix() is
+still in (a,b,c) after mix().
+
+If four pairs of (a,b,c) inputs are run through mix(), or through
+mix() in reverse, there are at least 32 bits of the output that
+are sometimes the same for one pair and different for another pair.
+This was tested for:
+* pairs that differed by one bit, by two bits, in any combination
+ of top bits of (a,b,c), or in any combination of bottom bits of
+ (a,b,c).
+* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ is commonly produced by subtraction) look like a single 1-bit
+ difference.
+* the base values were pseudorandom, all zero but one bit set, or
+ all zero plus a counter that starts at zero.
+
+Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
+satisfy this are
+ 4 6 8 16 19 4
+ 9 15 3 18 27 15
+ 14 9 3 7 17 3
+Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
+for "differ" defined as + with a one-bit base and a two-bit delta. I
+used http://burtleburtle.net/bob/hash/avalanche.html to choose
+the operations, constants, and arrangements of the variables.
+
+This does not achieve avalanche. There are input bits of (a,b,c)
+that fail to affect some output bits of (a,b,c), especially of a. The
+most thoroughly mixed value is c, but it doesn't really even achieve
+avalanche in c.
+
+This allows some parallelism. Read-after-writes are good at doubling
+the number of bits affected, so the goal of mixing pulls in the opposite
+direction as the goal of parallelism. I did what I could. Rotates
+seem to cost as much as shifts on every machine I could lay my hands
+on, and rotates are much kinder to the top and bottom bits, so I used
+rotates.
+-------------------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+ a -= c; a ^= rot(c, 4); c += b; \
+ b -= a; b ^= rot(a, 6); a += c; \
+ c -= b; c ^= rot(b, 8); b += a; \
+ a -= c; a ^= rot(c,16); c += b; \
+ b -= a; b ^= rot(a,19); a += c; \
+ c -= b; c ^= rot(b, 4); b += a; \
+}
+
+/*
+-------------------------------------------------------------------------------
+final -- final mixing of 3 32-bit values (a,b,c) into c
+
+Pairs of (a,b,c) values differing in only a few bits will usually
+produce values of c that look totally different. This was tested for
+* pairs that differed by one bit, by two bits, in any combination
+ of top bits of (a,b,c), or in any combination of bottom bits of
+ (a,b,c).
+* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ is commonly produced by subtraction) look like a single 1-bit
+ difference.
+* the base values were pseudorandom, all zero but one bit set, or
+ all zero plus a counter that starts at zero.
+
+These constants passed:
+ 14 11 25 16 4 14 24
+ 12 14 25 16 4 14 24
+and these came close:
+ 4 8 15 26 3 22 24
+ 10 8 15 26 3 22 24
+ 11 8 15 26 3 22 24
+-------------------------------------------------------------------------------
+*/
+#define final(a,b,c) \
+{ \
+ c ^= b; c -= rot(b,14); \
+ a ^= c; a -= rot(c,11); \
+ b ^= a; b -= rot(a,25); \
+ c ^= b; c -= rot(b,16); \
+ a ^= c; a -= rot(c,4); \
+ b ^= a; b -= rot(a,14); \
+ c ^= b; c -= rot(b,24); \
+}
+
+/*
+--------------------------------------------------------------------
+ This works on all machines. To be useful, it requires
+ -- that the key be an array of uint32_t's, and
+ -- that the length be the number of uint32_t's in the key
+
+ The function hashword() is identical to hashlittle() on little-endian
+ machines, and identical to hashbig() on big-endian machines,
+ except that the length has to be measured in uint32_ts rather than in
+ bytes. hashlittle() is more complicated than hashword() only because
+ hashlittle() has to dance around fitting the key bytes into registers.
+--------------------------------------------------------------------
+*/
+uint32_t hashword(
+const uint32_t *k, /* the key, an array of uint32_t values */
+size_t length, /* the length of the key, in uint32_ts */
+uint32_t initval) /* the previous hash, or an arbitrary value */
+{
+ uint32_t a,b,c;
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
+
+ /*------------------------------------------------- handle most of the key */
+ while (length > 3)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 3;
+ k += 3;
+ }
+
+ /*------------------------------------------- handle the last 3 uint32_t's */
+ switch(length) /* all the case statements fall through */
+ {
+ case 3 : c+=k[2];
+ case 2 : b+=k[1];
+ case 1 : a+=k[0];
+ final(a,b,c);
+ case 0: /* case 0: nothing left to add */
+ break;
+ }
+ /*------------------------------------------------------ report the result */
+ return c;
+}
+
+/* from K&R book site 139 */
+#define HASHSIZE 101
+
+unsigned kr_hash(char *s){
+ unsigned hashval;
+
+ for(hashval = 0; *s != '\0';s++)
+ hashval = *s + 31 * hashval;
+
+ return hashval % HASHSIZE;
+
+} /* end kr_hash() */
+
+unsigned fnv_hash ( void *key, int len )
+{
+ unsigned char *p = key;
+ unsigned h = 2166136261;
+ int i;
+
+ for ( i = 0; i < len; i++ )
+ h = ( h * 16777619 ) ^ p[i];
+
+ return h;
+}
+
+unsigned oat_hash ( void *key, int len )
+{
+ unsigned char *p = key;
+ unsigned h = 0;
+ int i;
+
+ for ( i = 0; i < len; i++ ) {
+ h += p[i];
+ h += ( h << 10 );
+ h ^= ( h >> 6 );
+ }
+
+ h += ( h << 3 );
+ h ^= ( h >> 11 );
+ h += ( h << 15 );
+
+ return h;
+}
+
+unsigned wt_hash ( void *key, int len )
+{
+ unsigned char *p = key;
+ unsigned h = 0x783c965aUL;
+ unsigned step = 16;
+
+ for (; len > 0; len--) {
+ h ^= *p * 9;
+ p++;
+ h = (h << step) | (h >> (32-step));
+ step ^= h;
+ step &= 0x1F;
+ }
+
+ return h;
+}
+
+
+#define run_test(fct, args) { \
+ unsigned long loop, count; \
+ volatile unsigned long result; \
+ double delta; \
+ struct timeval tv; \
+ fprintf(stderr, "Starting %s\n", #fct); \
+ tv = timeval_current(); \
+ count = 0; \
+ do { \
+ delta = timeval_elapsed(&tv); \
+ for (loop = 0; loop < 1000; loop++) { \
+ result = fct args; \
+ count++; \
+ } \
+ } while (delta < 1.0); \
+ fprintf(stdout, "%-20s : %10.0f run/sec\n", #fct, count/delta); \
+ fflush(stdout); \
+}
+
+int main(){
+
+ char **start;
+ int len;
+
+ char *urls[] = {
+ "http://www.microsoft.com/shared/core/1/webservice/navigation.asmx/DisplayDownlevelNavHtml",
+ NULL
+ };
+
+ start = urls;
+ len = strlen(*urls);
+
+ run_test(wt_hash, (*urls, len));
+ run_test(SuperFastHash2, (*urls, len));
+ run_test(SuperFastHash, (*urls, len));
+ run_test(haproxy_uri_hash, (*urls, len));
+ run_test(haproxy_server_hash, (*urls, len));
+ run_test(hashpjw, (*urls));
+ run_test(hash_djbx33, ((unsigned char *)*urls, len));
+ run_test(bernstein, ((unsigned char *)*urls, len, 4));
+ run_test(fnv_32a_str, (*urls, 0));
+ run_test(hashword, ((const uint32_t *)*urls,strlen(*urls),0));
+ run_test(kr_hash, (*urls));
+ run_test(sax_hash, (*urls, len));
+ run_test(fnv_hash, (*urls, len));
+ run_test(oat_hash, (*urls, len));
+
+ return 0;
+
+}/* end main() */