diff options
Diffstat (limited to 'src/rocksdb/util/hash.cc')
-rw-r--r-- | src/rocksdb/util/hash.cc | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/src/rocksdb/util/hash.cc b/src/rocksdb/util/hash.cc new file mode 100644 index 000000000..d72be8a45 --- /dev/null +++ b/src/rocksdb/util/hash.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include <string.h> +#include "util/coding.h" +#include "util/hash.h" +#include "util/util.h" +#include "util/xxhash.h" + +namespace ROCKSDB_NAMESPACE { + +uint32_t Hash(const char* data, size_t n, uint32_t seed) { + // MurmurHash1 - fast but mediocre quality + // https://github.com/aappleby/smhasher/wiki/MurmurHash1 + // + const uint32_t m = 0xc6a4a793; + const uint32_t r = 24; + const char* limit = data + n; + uint32_t h = static_cast<uint32_t>(seed ^ (n * m)); + + // Pick up four bytes at a time + while (data + 4 <= limit) { + uint32_t w = DecodeFixed32(data); + data += 4; + h += w; + h *= m; + h ^= (h >> 16); + } + + // Pick up remaining bytes + switch (limit - data) { + // Note: The original hash implementation used data[i] << shift, which + // promotes the char to int and then performs the shift. If the char is + // negative, the shift is undefined behavior in C++. The hash algorithm is + // part of the format definition, so we cannot change it; to obtain the same + // behavior in a legal way we just cast to uint32_t, which will do + // sign-extension. To guarantee compatibility with architectures where chars + // are unsigned we first cast the char to int8_t. + case 3: + h += static_cast<uint32_t>(static_cast<int8_t>(data[2])) << 16; + FALLTHROUGH_INTENDED; + case 2: + h += static_cast<uint32_t>(static_cast<int8_t>(data[1])) << 8; + FALLTHROUGH_INTENDED; + case 1: + h += static_cast<uint32_t>(static_cast<int8_t>(data[0])); + h *= m; + h ^= (h >> r); + break; + } + return h; +} + +// We are standardizing on a preview release of XXH3, because that's +// the best available at time of standardizing. +// +// In testing (mostly Intel Skylake), this hash function is much more +// thorough than Hash32 and is almost universally faster. Hash() only +// seems faster when passing runtime-sized keys of the same small size +// (less than about 24 bytes) thousands of times in a row; this seems +// to allow the branch predictor to work some magic. XXH3's speed is +// much less dependent on branch prediction. +// +// Hashing with a prefix extractor is potentially a common case of +// hashing objects of small, predictable size. We could consider +// bundling hash functions specialized for particular lengths with +// the prefix extractors. +uint64_t Hash64(const char* data, size_t n, uint64_t seed) { + return XXH3p_64bits_withSeed(data, n, seed); +} + +uint64_t Hash64(const char* data, size_t n) { + // Same as seed = 0 + return XXH3p_64bits(data, n); +} + +} // namespace ROCKSDB_NAMESPACE |