diff options
Diffstat (limited to '')
-rw-r--r-- | src/spookyhash/SpookyV2.cpp | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/src/spookyhash/SpookyV2.cpp b/src/spookyhash/SpookyV2.cpp new file mode 100644 index 0000000..f1aea49 --- /dev/null +++ b/src/spookyhash/SpookyV2.cpp @@ -0,0 +1,350 @@ +// Spooky Hash +// A 128-bit noncryptographic hash, for checksums and table lookup +// By Bob Jenkins. Public domain. +// Oct 31 2010: published framework, disclaimer ShortHash isn't right +// Nov 7 2010: disabled ShortHash +// Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again +// April 10 2012: buffer overflow on platforms without unaligned reads +// July 12 2012: was passing out variables in final to in/out in short +// July 30 2012: I reintroduced the buffer overflow +// August 5 2012: SpookyV2: d = should be d += in short hash, and remove extra mix from long hash + +#include <memory.h> +#include "SpookyV2.h" + +#define ALLOW_UNALIGNED_READS 1 + +// +// short hash ... it could be used on any message, +// but it's used by Spooky just for short messages. +// +void SpookyHash::Short( + const void *message, + size_t length, + uint64 *hash1, + uint64 *hash2) +{ + uint64 buf[2*sc_numVars]; + union + { + const uint8 *p8; + uint32 *p32; + uint64 *p64; + size_t i; + } u; + + u.p8 = (const uint8 *)message; + + if (!ALLOW_UNALIGNED_READS && (u.i & 0x7)) + { + memcpy(buf, message, length); + u.p64 = buf; + } + + size_t remainder = length%32; + uint64 a=*hash1; + uint64 b=*hash2; + uint64 c=sc_const; + uint64 d=sc_const; + + if (length > 15) + { + const uint64 *end = u.p64 + (length/32)*4; + + // handle all complete sets of 32 bytes + for (; u.p64 < end; u.p64 += 4) + { + c += SPOOKYHASH_LITTLE_ENDIAN_64(u.p64[0]); + d += SPOOKYHASH_LITTLE_ENDIAN_64(u.p64[1]); + ShortMix(a,b,c,d); + a += SPOOKYHASH_LITTLE_ENDIAN_64(u.p64[2]); + b += SPOOKYHASH_LITTLE_ENDIAN_64(u.p64[3]); + } + + //Handle the case of 16+ remaining bytes. + if (remainder >= 16) + { + c += SPOOKYHASH_LITTLE_ENDIAN_64(u.p64[0]); + d += SPOOKYHASH_LITTLE_ENDIAN_64(u.p64[1]); + ShortMix(a,b,c,d); + u.p64 += 2; + remainder -= 16; + } + } + + // Handle the last 0..15 bytes, and its length + d += ((uint64)length) << 56; + switch (remainder) + { + case 15: + d += ((uint64)u.p8[14]) << 48; + case 14: + d += ((uint64)u.p8[13]) << 40; + case 13: + d += ((uint64)u.p8[12]) << 32; + case 12: + d += SPOOKYHASH_LITTLE_ENDIAN_32(u.p32[2]); + c += SPOOKYHASH_LITTLE_ENDIAN_64(u.p64[0]); + break; + case 11: + d += ((uint64)u.p8[10]) << 16; + case 10: + d += ((uint64)u.p8[9]) << 8; + case 9: + d += (uint64)u.p8[8]; + case 8: + c += SPOOKYHASH_LITTLE_ENDIAN_64(u.p64[0]); + break; + case 7: + c += ((uint64)u.p8[6]) << 48; + case 6: + c += ((uint64)u.p8[5]) << 40; + case 5: + c += ((uint64)u.p8[4]) << 32; + case 4: + c += SPOOKYHASH_LITTLE_ENDIAN_32(u.p32[0]); + break; + case 3: + c += ((uint64)u.p8[2]) << 16; + case 2: + c += ((uint64)u.p8[1]) << 8; + case 1: + c += (uint64)u.p8[0]; + break; + case 0: + c += sc_const; + d += sc_const; + } + ShortEnd(a,b,c,d); + *hash1 = a; + *hash2 = b; +} + + + + +// do the whole hash in one call +void SpookyHash::Hash128( + const void *message, + size_t length, + uint64 *hash1, + uint64 *hash2) +{ + if (length < sc_bufSize) + { + Short(message, length, hash1, hash2); + return; + } + + uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; + uint64 buf[sc_numVars]; + uint64 *end; + union + { + const uint8 *p8; + uint64 *p64; + size_t i; + } u; + size_t remainder; + + h0=h3=h6=h9 = *hash1; + h1=h4=h7=h10 = *hash2; + h2=h5=h8=h11 = sc_const; + + u.p8 = (const uint8 *)message; + end = u.p64 + (length/sc_blockSize)*sc_numVars; + + // handle all whole sc_blockSize blocks of bytes + if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0)) + { + while (u.p64 < end) + { + Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + else + { + while (u.p64 < end) + { + memcpy(buf, u.p64, sc_blockSize); + Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + + // handle the last partial block of sc_blockSize bytes + remainder = (length - ((const uint8 *)end-(const uint8 *)message)); + memcpy(buf, end, remainder); + memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder); + ((uint8 *)buf)[sc_blockSize-1] = remainder; + + // do some final mixing + End(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + *hash1 = h0; + *hash2 = h1; +} + + + +// init spooky state +void SpookyHash::Init(uint64 seed1, uint64 seed2) +{ + m_length = 0; + m_remainder = 0; + m_state[0] = seed1; + m_state[1] = seed2; +} + + +// add a message fragment to the state +void SpookyHash::Update(const void *message, size_t length) +{ + uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; + size_t newLength = length + m_remainder; + uint8 remainder; + union + { + const uint8 *p8; + uint64 *p64; + size_t i; + } u; + const uint64 *end; + + // Is this message fragment too short? If it is, stuff it away. + if (newLength < sc_bufSize) + { + memcpy(&((uint8 *)m_data)[m_remainder], message, length); + m_length = length + m_length; + m_remainder = (uint8)newLength; + return; + } + + // init the variables + if (m_length < sc_bufSize) + { + h0=h3=h6=h9 = m_state[0]; + h1=h4=h7=h10 = m_state[1]; + h2=h5=h8=h11 = sc_const; + } + else + { + h0 = m_state[0]; + h1 = m_state[1]; + h2 = m_state[2]; + h3 = m_state[3]; + h4 = m_state[4]; + h5 = m_state[5]; + h6 = m_state[6]; + h7 = m_state[7]; + h8 = m_state[8]; + h9 = m_state[9]; + h10 = m_state[10]; + h11 = m_state[11]; + } + m_length = length + m_length; + + // if we've got anything stuffed away, use it now + if (m_remainder) + { + uint8 prefix = sc_bufSize-m_remainder; + memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix); + u.p64 = m_data; + Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p8 = ((const uint8 *)message) + prefix; + length -= prefix; + } + else + { + u.p8 = (const uint8 *)message; + } + + // handle all whole blocks of sc_blockSize bytes + end = u.p64 + (length/sc_blockSize)*sc_numVars; + remainder = (uint8)(length-((const uint8 *)end-u.p8)); + if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0) + { + while (u.p64 < end) + { + Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + else + { + while (u.p64 < end) + { + memcpy(m_data, u.p8, sc_blockSize); + Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + + // stuff away the last few bytes + m_remainder = remainder; + memcpy(m_data, end, remainder); + + // stuff away the variables + m_state[0] = h0; + m_state[1] = h1; + m_state[2] = h2; + m_state[3] = h3; + m_state[4] = h4; + m_state[5] = h5; + m_state[6] = h6; + m_state[7] = h7; + m_state[8] = h8; + m_state[9] = h9; + m_state[10] = h10; + m_state[11] = h11; +} + +// report the hash for the concatenation of all message fragments so far +void +SpookyHash::Final(uint64* hash1, uint64* hash2) const +{ + // init the variables + if (m_length < sc_bufSize) { + *hash1 = m_state[0]; + *hash2 = m_state[1]; + Short(m_data, m_length, hash1, hash2); + return; + } + + const uint64 *data = (const uint64 *)m_data; + uint8 remainder = m_remainder; + + uint64 h0 = m_state[0]; + uint64 h1 = m_state[1]; + uint64 h2 = m_state[2]; + uint64 h3 = m_state[3]; + uint64 h4 = m_state[4]; + uint64 h5 = m_state[5]; + uint64 h6 = m_state[6]; + uint64 h7 = m_state[7]; + uint64 h8 = m_state[8]; + uint64 h9 = m_state[9]; + uint64 h10 = m_state[10]; + uint64 h11 = m_state[11]; + + if (remainder >= sc_blockSize) + { + // m_data can contain two blocks; handle any whole first block + Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + data += sc_numVars; + remainder -= sc_blockSize; + } + + // mix in the last partial block, and the length mod sc_blockSize + memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder)); + + ((uint8 *)data)[sc_blockSize-1] = remainder; + + // do some final mixing + End(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + + *hash1 = h0; + *hash2 = h1; +} + |