diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
commit | 43a123c1ae6613b3efeed291fa552ecd909d3acf (patch) | |
tree | fd92518b7024bc74031f78a1cf9e454b65e73665 /src/hash/maphash/maphash.go | |
parent | Initial commit. (diff) | |
download | golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.tar.xz golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.zip |
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/hash/maphash/maphash.go')
-rw-r--r-- | src/hash/maphash/maphash.go | 308 |
1 files changed, 308 insertions, 0 deletions
diff --git a/src/hash/maphash/maphash.go b/src/hash/maphash/maphash.go new file mode 100644 index 0000000..690068a --- /dev/null +++ b/src/hash/maphash/maphash.go @@ -0,0 +1,308 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package maphash provides hash functions on byte sequences. +// These hash functions are intended to be used to implement hash tables or +// other data structures that need to map arbitrary strings or byte +// sequences to a uniform distribution on unsigned 64-bit integers. +// Each different instance of a hash table or data structure should use its own Seed. +// +// The hash functions are not cryptographically secure. +// (See crypto/sha256 and crypto/sha512 for cryptographic use.) +package maphash + +import ( + "unsafe" +) + +// A Seed is a random value that selects the specific hash function +// computed by a Hash. If two Hashes use the same Seeds, they +// will compute the same hash values for any given input. +// If two Hashes use different Seeds, they are very likely to compute +// distinct hash values for any given input. +// +// A Seed must be initialized by calling MakeSeed. +// The zero seed is uninitialized and not valid for use with Hash's SetSeed method. +// +// Each Seed value is local to a single process and cannot be serialized +// or otherwise recreated in a different process. +type Seed struct { + s uint64 +} + +// Bytes returns the hash of b with the given seed. +// +// Bytes is equivalent to, but more convenient and efficient than: +// +// var h Hash +// h.SetSeed(seed) +// h.Write(b) +// return h.Sum64() +func Bytes(seed Seed, b []byte) uint64 { + state := seed.s + if state == 0 { + panic("maphash: use of uninitialized Seed") + } + if len(b) == 0 { + return rthash(nil, 0, state) // avoid &b[0] index panic below + } + if len(b) > bufSize { + b = b[:len(b):len(b)] // merge len and cap calculations when reslicing + for len(b) > bufSize { + state = rthash(&b[0], bufSize, state) + b = b[bufSize:] + } + } + return rthash(&b[0], len(b), state) +} + +// String returns the hash of s with the given seed. +// +// String is equivalent to, but more convenient and efficient than: +// +// var h Hash +// h.SetSeed(seed) +// h.WriteString(s) +// return h.Sum64() +func String(seed Seed, s string) uint64 { + state := seed.s + if state == 0 { + panic("maphash: use of uninitialized Seed") + } + for len(s) > bufSize { + p := (*byte)(unsafe.StringData(s)) + state = rthash(p, bufSize, state) + s = s[bufSize:] + } + p := (*byte)(unsafe.StringData(s)) + return rthash(p, len(s), state) +} + +// A Hash computes a seeded hash of a byte sequence. +// +// The zero Hash is a valid Hash ready to use. +// A zero Hash chooses a random seed for itself during +// the first call to a Reset, Write, Seed, or Sum64 method. +// For control over the seed, use SetSeed. +// +// The computed hash values depend only on the initial seed and +// the sequence of bytes provided to the Hash object, not on the way +// in which the bytes are provided. For example, the three sequences +// +// h.Write([]byte{'f','o','o'}) +// h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') +// h.WriteString("foo") +// +// all have the same effect. +// +// Hashes are intended to be collision-resistant, even for situations +// where an adversary controls the byte sequences being hashed. +// +// A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. +// If multiple goroutines must compute the same seeded hash, +// each can declare its own Hash and call SetSeed with a common Seed. +type Hash struct { + _ [0]func() // not comparable + seed Seed // initial seed used for this hash + state Seed // current hash of all flushed bytes + buf [bufSize]byte // unflushed byte buffer + n int // number of unflushed bytes +} + +// bufSize is the size of the Hash write buffer. +// The buffer ensures that writes depend only on the sequence of bytes, +// not the sequence of WriteByte/Write/WriteString calls, +// by always calling rthash with a full buffer (except for the tail). +const bufSize = 128 + +// initSeed seeds the hash if necessary. +// initSeed is called lazily before any operation that actually uses h.seed/h.state. +// Note that this does not include Write/WriteByte/WriteString in the case +// where they only add to h.buf. (If they write too much, they call h.flush, +// which does call h.initSeed.) +func (h *Hash) initSeed() { + if h.seed.s == 0 { + seed := MakeSeed() + h.seed = seed + h.state = seed + } +} + +// WriteByte adds b to the sequence of bytes hashed by h. +// It never fails; the error result is for implementing io.ByteWriter. +func (h *Hash) WriteByte(b byte) error { + if h.n == len(h.buf) { + h.flush() + } + h.buf[h.n] = b + h.n++ + return nil +} + +// Write adds b to the sequence of bytes hashed by h. +// It always writes all of b and never fails; the count and error result are for implementing io.Writer. +func (h *Hash) Write(b []byte) (int, error) { + size := len(b) + // Deal with bytes left over in h.buf. + // h.n <= bufSize is always true. + // Checking it is ~free and it lets the compiler eliminate a bounds check. + if h.n > 0 && h.n <= bufSize { + k := copy(h.buf[h.n:], b) + h.n += k + if h.n < bufSize { + // Copied the entirety of b to h.buf. + return size, nil + } + b = b[k:] + h.flush() + // No need to set h.n = 0 here; it happens just before exit. + } + // Process as many full buffers as possible, without copying, and calling initSeed only once. + if len(b) > bufSize { + h.initSeed() + for len(b) > bufSize { + h.state.s = rthash(&b[0], bufSize, h.state.s) + b = b[bufSize:] + } + } + // Copy the tail. + copy(h.buf[:], b) + h.n = len(b) + return size, nil +} + +// WriteString adds the bytes of s to the sequence of bytes hashed by h. +// It always writes all of s and never fails; the count and error result are for implementing io.StringWriter. +func (h *Hash) WriteString(s string) (int, error) { + // WriteString mirrors Write. See Write for comments. + size := len(s) + if h.n > 0 && h.n <= bufSize { + k := copy(h.buf[h.n:], s) + h.n += k + if h.n < bufSize { + return size, nil + } + s = s[k:] + h.flush() + } + if len(s) > bufSize { + h.initSeed() + for len(s) > bufSize { + ptr := (*byte)(unsafe.StringData(s)) + h.state.s = rthash(ptr, bufSize, h.state.s) + s = s[bufSize:] + } + } + copy(h.buf[:], s) + h.n = len(s) + return size, nil +} + +// Seed returns h's seed value. +func (h *Hash) Seed() Seed { + h.initSeed() + return h.seed +} + +// SetSeed sets h to use seed, which must have been returned by MakeSeed +// or by another Hash's Seed method. +// Two Hash objects with the same seed behave identically. +// Two Hash objects with different seeds will very likely behave differently. +// Any bytes added to h before this call will be discarded. +func (h *Hash) SetSeed(seed Seed) { + if seed.s == 0 { + panic("maphash: use of uninitialized Seed") + } + h.seed = seed + h.state = seed + h.n = 0 +} + +// Reset discards all bytes added to h. +// (The seed remains the same.) +func (h *Hash) Reset() { + h.initSeed() + h.state = h.seed + h.n = 0 +} + +// precondition: buffer is full. +func (h *Hash) flush() { + if h.n != len(h.buf) { + panic("maphash: flush of partially full buffer") + } + h.initSeed() + h.state.s = rthash(&h.buf[0], h.n, h.state.s) + h.n = 0 +} + +// Sum64 returns h's current 64-bit value, which depends on +// h's seed and the sequence of bytes added to h since the +// last call to Reset or SetSeed. +// +// All bits of the Sum64 result are close to uniformly and +// independently distributed, so it can be safely reduced +// by using bit masking, shifting, or modular arithmetic. +func (h *Hash) Sum64() uint64 { + h.initSeed() + return rthash(&h.buf[0], h.n, h.state.s) +} + +// MakeSeed returns a new random seed. +func MakeSeed() Seed { + var s uint64 + for { + s = runtime_fastrand64() + // We use seed 0 to indicate an uninitialized seed/hash, + // so keep trying until we get a non-zero seed. + if s != 0 { + break + } + } + return Seed{s: s} +} + +//go:linkname runtime_fastrand64 runtime.fastrand64 +func runtime_fastrand64() uint64 + +func rthash(ptr *byte, len int, seed uint64) uint64 { + if len == 0 { + return seed + } + // The runtime hasher only works on uintptr. For 64-bit + // architectures, we use the hasher directly. Otherwise, + // we use two parallel hashers on the lower and upper 32 bits. + if unsafe.Sizeof(uintptr(0)) == 8 { + return uint64(runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len))) + } + lo := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len)) + hi := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed>>32), uintptr(len)) + return uint64(hi)<<32 | uint64(lo) +} + +//go:linkname runtime_memhash runtime.memhash +//go:noescape +func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr + +// Sum appends the hash's current 64-bit value to b. +// It exists for implementing hash.Hash. +// For direct calls, it is more efficient to use Sum64. +func (h *Hash) Sum(b []byte) []byte { + x := h.Sum64() + return append(b, + byte(x>>0), + byte(x>>8), + byte(x>>16), + byte(x>>24), + byte(x>>32), + byte(x>>40), + byte(x>>48), + byte(x>>56)) +} + +// Size returns h's hash value size, 8 bytes. +func (h *Hash) Size() int { return 8 } + +// BlockSize returns h's block size. +func (h *Hash) BlockSize() int { return len(h.buf) } |