diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/rocksdb/util/hash.cc | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/rocksdb/util/hash.cc')
-rw-r--r-- | src/rocksdb/util/hash.cc | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/src/rocksdb/util/hash.cc b/src/rocksdb/util/hash.cc new file mode 100644 index 000000000..0f7f2edc1 --- /dev/null +++ b/src/rocksdb/util/hash.cc @@ -0,0 +1,201 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "util/hash.h" + +#include <string> + +#include "port/lang.h" +#include "util/coding.h" +#include "util/hash128.h" +#include "util/math128.h" +#include "util/xxhash.h" +#include "util/xxph3.h" + +namespace ROCKSDB_NAMESPACE { + +uint64_t (*kGetSliceNPHash64UnseededFnPtr)(const Slice&) = &GetSliceHash64; + +uint32_t Hash(const char* data, size_t n, uint32_t seed) { + // MurmurHash1 - fast but mediocre quality + // https://github.com/aappleby/smhasher/wiki/MurmurHash1 + // + const uint32_t m = 0xc6a4a793; + const uint32_t r = 24; + const char* limit = data + n; + uint32_t h = static_cast<uint32_t>(seed ^ (n * m)); + + // Pick up four bytes at a time + while (data + 4 <= limit) { + uint32_t w = DecodeFixed32(data); + data += 4; + h += w; + h *= m; + h ^= (h >> 16); + } + + // Pick up remaining bytes + switch (limit - data) { + // Note: The original hash implementation used data[i] << shift, which + // promotes the char to int and then performs the shift. If the char is + // negative, the shift is undefined behavior in C++. The hash algorithm is + // part of the format definition, so we cannot change it; to obtain the same + // behavior in a legal way we just cast to uint32_t, which will do + // sign-extension. To guarantee compatibility with architectures where chars + // are unsigned we first cast the char to int8_t. + case 3: + h += static_cast<uint32_t>(static_cast<int8_t>(data[2])) << 16; + FALLTHROUGH_INTENDED; + case 2: + h += static_cast<uint32_t>(static_cast<int8_t>(data[1])) << 8; + FALLTHROUGH_INTENDED; + case 1: + h += static_cast<uint32_t>(static_cast<int8_t>(data[0])); + h *= m; + h ^= (h >> r); + break; + } + return h; +} + +// We are standardizing on a preview release of XXH3, because that's +// the best available at time of standardizing. +// +// In testing (mostly Intel Skylake), this hash function is much more +// thorough than Hash32 and is almost universally faster. Hash() only +// seems faster when passing runtime-sized keys of the same small size +// (less than about 24 bytes) thousands of times in a row; this seems +// to allow the branch predictor to work some magic. XXH3's speed is +// much less dependent on branch prediction. +// +// Hashing with a prefix extractor is potentially a common case of +// hashing objects of small, predictable size. We could consider +// bundling hash functions specialized for particular lengths with +// the prefix extractors. +uint64_t Hash64(const char* data, size_t n, uint64_t seed) { + return XXPH3_64bits_withSeed(data, n, seed); +} + +uint64_t Hash64(const char* data, size_t n) { + // Same as seed = 0 + return XXPH3_64bits(data, n); +} + +uint64_t GetSlicePartsNPHash64(const SliceParts& data, uint64_t seed) { + // TODO(ajkr): use XXH3 streaming APIs to avoid the copy/allocation. + size_t concat_len = 0; + for (int i = 0; i < data.num_parts; ++i) { + concat_len += data.parts[i].size(); + } + std::string concat_data; + concat_data.reserve(concat_len); + for (int i = 0; i < data.num_parts; ++i) { + concat_data.append(data.parts[i].data(), data.parts[i].size()); + } + assert(concat_data.size() == concat_len); + return NPHash64(concat_data.data(), concat_len, seed); +} + +Unsigned128 Hash128(const char* data, size_t n, uint64_t seed) { + auto h = XXH3_128bits_withSeed(data, n, seed); + return (Unsigned128{h.high64} << 64) | (h.low64); +} + +Unsigned128 Hash128(const char* data, size_t n) { + // Same as seed = 0 + auto h = XXH3_128bits(data, n); + return (Unsigned128{h.high64} << 64) | (h.low64); +} + +void Hash2x64(const char* data, size_t n, uint64_t* high64, uint64_t* low64) { + // Same as seed = 0 + auto h = XXH3_128bits(data, n); + *high64 = h.high64; + *low64 = h.low64; +} + +void Hash2x64(const char* data, size_t n, uint64_t seed, uint64_t* high64, + uint64_t* low64) { + auto h = XXH3_128bits_withSeed(data, n, seed); + *high64 = h.high64; + *low64 = h.low64; +} + +namespace { + +inline uint64_t XXH3_avalanche(uint64_t h64) { + h64 ^= h64 >> 37; + h64 *= 0x165667919E3779F9U; + h64 ^= h64 >> 32; + return h64; +} + +inline uint64_t XXH3_unavalanche(uint64_t h64) { + h64 ^= h64 >> 32; + h64 *= 0x8da8ee41d6df849U; // inverse of 0x165667919E3779F9U + h64 ^= h64 >> 37; + return h64; +} + +} // namespace + +void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed, + uint64_t* out_high64, uint64_t* out_low64) { + // Adapted from XXH3_len_9to16_128b + const uint64_t bitflipl = /*secret part*/ 0x59973f0033362349U - seed; + const uint64_t bitfliph = /*secret part*/ 0xc202797692d63d58U + seed; + Unsigned128 tmp128 = + Multiply64to128(in_low64 ^ in_high64 ^ bitflipl, 0x9E3779B185EBCA87U); + uint64_t lo = Lower64of128(tmp128); + uint64_t hi = Upper64of128(tmp128); + lo += 0x3c0000000000000U; // (len - 1) << 54 + in_high64 ^= bitfliph; + hi += in_high64 + (Lower32of64(in_high64) * uint64_t{0x85EBCA76}); + lo ^= EndianSwapValue(hi); + tmp128 = Multiply64to128(lo, 0xC2B2AE3D27D4EB4FU); + lo = Lower64of128(tmp128); + hi = Upper64of128(tmp128) + (hi * 0xC2B2AE3D27D4EB4FU); + *out_low64 = XXH3_avalanche(lo); + *out_high64 = XXH3_avalanche(hi); +} + +void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed, + uint64_t* out_high64, uint64_t* out_low64) { + // Inverted above (also consulting XXH3_len_9to16_128b) + const uint64_t bitflipl = /*secret part*/ 0x59973f0033362349U - seed; + const uint64_t bitfliph = /*secret part*/ 0xc202797692d63d58U + seed; + uint64_t lo = XXH3_unavalanche(in_low64); + uint64_t hi = XXH3_unavalanche(in_high64); + lo *= 0xba79078168d4baf; // inverse of 0xC2B2AE3D27D4EB4FU + hi -= Upper64of128(Multiply64to128(lo, 0xC2B2AE3D27D4EB4FU)); + hi *= 0xba79078168d4baf; // inverse of 0xC2B2AE3D27D4EB4FU + lo ^= EndianSwapValue(hi); + lo -= 0x3c0000000000000U; + lo *= 0x887493432badb37U; // inverse of 0x9E3779B185EBCA87U + hi -= Upper64of128(Multiply64to128(lo, 0x9E3779B185EBCA87U)); + uint32_t tmp32 = Lower32of64(hi) * 0xb6c92f47; // inverse of 0x85EBCA77 + hi -= tmp32; + hi = (hi & 0xFFFFFFFF00000000U) - + ((tmp32 * uint64_t{0x85EBCA76}) & 0xFFFFFFFF00000000U) + tmp32; + hi ^= bitfliph; + lo ^= hi ^ bitflipl; + *out_high64 = hi; + *out_low64 = lo; +} + +void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64, + uint64_t* out_high64, uint64_t* out_low64) { + BijectiveHash2x64(in_high64, in_low64, /*seed*/ 0, out_high64, out_low64); +} + +void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64, + uint64_t* out_high64, uint64_t* out_low64) { + BijectiveUnhash2x64(in_high64, in_low64, /*seed*/ 0, out_high64, out_low64); +} +} // namespace ROCKSDB_NAMESPACE |