diff options
Diffstat (limited to '')
-rw-r--r-- | src/civetweb/src/third_party/duktape-1.5.2/src-separate/duk_heap_hashstring.c | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/src/civetweb/src/third_party/duktape-1.5.2/src-separate/duk_heap_hashstring.c b/src/civetweb/src/third_party/duktape-1.5.2/src-separate/duk_heap_hashstring.c new file mode 100644 index 000000000..5cad7958e --- /dev/null +++ b/src/civetweb/src/third_party/duktape-1.5.2/src-separate/duk_heap_hashstring.c @@ -0,0 +1,120 @@ +/* + * String hash computation (interning). + * + * String hashing is performance critical because a string hash is computed + * for all new strings which are candidates to be added to the string table. + * However, strings actually added to the string table go through a codepoint + * length calculation which dominates performance because it goes through + * every byte of the input string (but only for strings added). + * + * The string hash algorithm should be fast, but on the other hand provide + * good enough hashes to ensure both string table and object property table + * hash tables work reasonably well (i.e., there aren't too many collisions + * with real world inputs). Unless the hash is cryptographic, it's always + * possible to craft inputs with maximal hash collisions. + * + * NOTE: The hash algorithms must match src/dukutil.py:duk_heap_hashstring() + * for ROM string support! + */ + +#include "duk_internal.h" + +#if defined(DUK_USE_STRHASH_DENSE) +/* Constants for duk_hashstring(). */ +#define DUK__STRHASH_SHORTSTRING 4096L +#define DUK__STRHASH_MEDIUMSTRING (256L * 1024L) +#define DUK__STRHASH_BLOCKSIZE 256L + +DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap *heap, const duk_uint8_t *str, duk_size_t len) { + duk_uint32_t hash; + + /* Use Murmurhash2 directly for short strings, and use "block skipping" + * for long strings: hash an initial part and then sample the rest of + * the string with reasonably sized chunks. An initial offset for the + * sampling is computed based on a hash of the initial part of the string; + * this is done to (usually) avoid the case where all long strings have + * certain offset ranges which are never sampled. + * + * Skip should depend on length and bound the total time to roughly + * logarithmic. With current values: + * + * 1M string => 256 * 241 = 61696 bytes (0.06M) of hashing + * 1G string => 256 * 16321 = 4178176 bytes (3.98M) of hashing + * + * XXX: It would be better to compute the skip offset more "smoothly" + * instead of having a few boundary values. + */ + + /* note: mixing len into seed improves hashing when skipping */ + duk_uint32_t str_seed = heap->hash_seed ^ ((duk_uint32_t) len); + + if (len <= DUK__STRHASH_SHORTSTRING) { + hash = duk_util_hashbytes(str, len, str_seed); + } else { + duk_size_t off; + duk_size_t skip; + + if (len <= DUK__STRHASH_MEDIUMSTRING) { + skip = (duk_size_t) (16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE); + } else { + skip = (duk_size_t) (256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE); + } + + hash = duk_util_hashbytes(str, (duk_size_t) DUK__STRHASH_SHORTSTRING, str_seed); + off = DUK__STRHASH_SHORTSTRING + (skip * (hash % 256)) / 256; + + /* XXX: inefficient loop */ + while (off < len) { + duk_size_t left = len - off; + duk_size_t now = (duk_size_t) (left > DUK__STRHASH_BLOCKSIZE ? DUK__STRHASH_BLOCKSIZE : left); + hash ^= duk_util_hashbytes(str + off, now, str_seed); + off += skip; + } + } + +#if defined(DUK_USE_STRHASH16) + /* Truncate to 16 bits here, so that a computed hash can be compared + * against a hash stored in a 16-bit field. + */ + hash &= 0x0000ffffUL; +#endif + return hash; +} + +#undef DUK__STRHASH_SHORTSTRING +#undef DUK__STRHASH_MEDIUMSTRING +#undef DUK__STRHASH_BLOCKSIZE +#else /* DUK_USE_STRHASH_DENSE */ +DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap *heap, const duk_uint8_t *str, duk_size_t len) { + duk_uint32_t hash; + duk_size_t step; + duk_size_t off; + + /* Slightly modified "Bernstein hash" from: + * + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx + * + * Modifications: string skipping and reverse direction similar to + * Lua 5.1.5, and different hash initializer. + * + * The reverse direction ensures last byte it always included in the + * hash which is a good default as changing parts of the string are + * more often in the suffix than in the prefix. + */ + + hash = heap->hash_seed ^ ((duk_uint32_t) len); /* Bernstein hash init value is normally 5381 */ + step = (len >> DUK_USE_STRHASH_SKIP_SHIFT) + 1; + for (off = len; off >= step; off -= step) { + DUK_ASSERT(off >= 1); /* off >= step, and step >= 1 */ + hash = (hash * 33) + str[off - 1]; + } + +#if defined(DUK_USE_STRHASH16) + /* Truncate to 16 bits here, so that a computed hash can be compared + * against a hash stored in a 16-bit field. + */ + hash &= 0x0000ffffUL; +#endif + return hash; +} +#endif /* DUK_USE_STRHASH_DENSE */ |