summaryrefslogtreecommitdiffstats
path: root/src/hash/maphash/maphash.go
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/hash/maphash/maphash.go263
1 files changed, 263 insertions, 0 deletions
diff --git a/src/hash/maphash/maphash.go b/src/hash/maphash/maphash.go
new file mode 100644
index 0000000..d022d74
--- /dev/null
+++ b/src/hash/maphash/maphash.go
@@ -0,0 +1,263 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package maphash provides hash functions on byte sequences.
+// These hash functions are intended to be used to implement hash tables or
+// other data structures that need to map arbitrary strings or byte
+// sequences to a uniform distribution on unsigned 64-bit integers.
+// Each different instance of a hash table or data structure should use its own Seed.
+//
+// The hash functions are not cryptographically secure.
+// (See crypto/sha256 and crypto/sha512 for cryptographic use.)
+//
+package maphash
+
+import (
+ "internal/unsafeheader"
+ "unsafe"
+)
+
+// A Seed is a random value that selects the specific hash function
+// computed by a Hash. If two Hashes use the same Seeds, they
+// will compute the same hash values for any given input.
+// If two Hashes use different Seeds, they are very likely to compute
+// distinct hash values for any given input.
+//
+// A Seed must be initialized by calling MakeSeed.
+// The zero seed is uninitialized and not valid for use with Hash's SetSeed method.
+//
+// Each Seed value is local to a single process and cannot be serialized
+// or otherwise recreated in a different process.
+type Seed struct {
+ s uint64
+}
+
+// A Hash computes a seeded hash of a byte sequence.
+//
+// The zero Hash is a valid Hash ready to use.
+// A zero Hash chooses a random seed for itself during
+// the first call to a Reset, Write, Seed, or Sum64 method.
+// For control over the seed, use SetSeed.
+//
+// The computed hash values depend only on the initial seed and
+// the sequence of bytes provided to the Hash object, not on the way
+// in which the bytes are provided. For example, the three sequences
+//
+// h.Write([]byte{'f','o','o'})
+// h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o')
+// h.WriteString("foo")
+//
+// all have the same effect.
+//
+// Hashes are intended to be collision-resistant, even for situations
+// where an adversary controls the byte sequences being hashed.
+//
+// A Hash is not safe for concurrent use by multiple goroutines, but a Seed is.
+// If multiple goroutines must compute the same seeded hash,
+// each can declare its own Hash and call SetSeed with a common Seed.
+type Hash struct {
+ _ [0]func() // not comparable
+ seed Seed // initial seed used for this hash
+ state Seed // current hash of all flushed bytes
+ buf [bufSize]byte // unflushed byte buffer
+ n int // number of unflushed bytes
+}
+
+// bufSize is the size of the Hash write buffer.
+// The buffer ensures that writes depend only on the sequence of bytes,
+// not the sequence of WriteByte/Write/WriteString calls,
+// by always calling rthash with a full buffer (except for the tail).
+const bufSize = 128
+
+// initSeed seeds the hash if necessary.
+// initSeed is called lazily before any operation that actually uses h.seed/h.state.
+// Note that this does not include Write/WriteByte/WriteString in the case
+// where they only add to h.buf. (If they write too much, they call h.flush,
+// which does call h.initSeed.)
+func (h *Hash) initSeed() {
+ if h.seed.s == 0 {
+ seed := MakeSeed()
+ h.seed = seed
+ h.state = seed
+ }
+}
+
+// WriteByte adds b to the sequence of bytes hashed by h.
+// It never fails; the error result is for implementing io.ByteWriter.
+func (h *Hash) WriteByte(b byte) error {
+ if h.n == len(h.buf) {
+ h.flush()
+ }
+ h.buf[h.n] = b
+ h.n++
+ return nil
+}
+
+// Write adds b to the sequence of bytes hashed by h.
+// It always writes all of b and never fails; the count and error result are for implementing io.Writer.
+func (h *Hash) Write(b []byte) (int, error) {
+ size := len(b)
+ // Deal with bytes left over in h.buf.
+ // h.n <= bufSize is always true.
+ // Checking it is ~free and it lets the compiler eliminate a bounds check.
+ if h.n > 0 && h.n <= bufSize {
+ k := copy(h.buf[h.n:], b)
+ h.n += k
+ if h.n < bufSize {
+ // Copied the entirety of b to h.buf.
+ return size, nil
+ }
+ b = b[k:]
+ h.flush()
+ // No need to set h.n = 0 here; it happens just before exit.
+ }
+ // Process as many full buffers as possible, without copying, and calling initSeed only once.
+ if len(b) > bufSize {
+ h.initSeed()
+ for len(b) > bufSize {
+ h.state.s = rthash(&b[0], bufSize, h.state.s)
+ b = b[bufSize:]
+ }
+ }
+ // Copy the tail.
+ copy(h.buf[:], b)
+ h.n = len(b)
+ return size, nil
+}
+
+// WriteString adds the bytes of s to the sequence of bytes hashed by h.
+// It always writes all of s and never fails; the count and error result are for implementing io.StringWriter.
+func (h *Hash) WriteString(s string) (int, error) {
+ // WriteString mirrors Write. See Write for comments.
+ size := len(s)
+ if h.n > 0 && h.n <= bufSize {
+ k := copy(h.buf[h.n:], s)
+ h.n += k
+ if h.n < bufSize {
+ return size, nil
+ }
+ s = s[k:]
+ h.flush()
+ }
+ if len(s) > bufSize {
+ h.initSeed()
+ for len(s) > bufSize {
+ ptr := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data)
+ h.state.s = rthash(ptr, bufSize, h.state.s)
+ s = s[bufSize:]
+ }
+ }
+ copy(h.buf[:], s)
+ h.n = len(s)
+ return size, nil
+}
+
+// Seed returns h's seed value.
+func (h *Hash) Seed() Seed {
+ h.initSeed()
+ return h.seed
+}
+
+// SetSeed sets h to use seed, which must have been returned by MakeSeed
+// or by another Hash's Seed method.
+// Two Hash objects with the same seed behave identically.
+// Two Hash objects with different seeds will very likely behave differently.
+// Any bytes added to h before this call will be discarded.
+func (h *Hash) SetSeed(seed Seed) {
+ if seed.s == 0 {
+ panic("maphash: use of uninitialized Seed")
+ }
+ h.seed = seed
+ h.state = seed
+ h.n = 0
+}
+
+// Reset discards all bytes added to h.
+// (The seed remains the same.)
+func (h *Hash) Reset() {
+ h.initSeed()
+ h.state = h.seed
+ h.n = 0
+}
+
+// precondition: buffer is full.
+func (h *Hash) flush() {
+ if h.n != len(h.buf) {
+ panic("maphash: flush of partially full buffer")
+ }
+ h.initSeed()
+ h.state.s = rthash(&h.buf[0], h.n, h.state.s)
+ h.n = 0
+}
+
+// Sum64 returns h's current 64-bit value, which depends on
+// h's seed and the sequence of bytes added to h since the
+// last call to Reset or SetSeed.
+//
+// All bits of the Sum64 result are close to uniformly and
+// independently distributed, so it can be safely reduced
+// by using bit masking, shifting, or modular arithmetic.
+func (h *Hash) Sum64() uint64 {
+ h.initSeed()
+ return rthash(&h.buf[0], h.n, h.state.s)
+}
+
+// MakeSeed returns a new random seed.
+func MakeSeed() Seed {
+ var s1, s2 uint64
+ for {
+ s1 = uint64(runtime_fastrand())
+ s2 = uint64(runtime_fastrand())
+ // We use seed 0 to indicate an uninitialized seed/hash,
+ // so keep trying until we get a non-zero seed.
+ if s1|s2 != 0 {
+ break
+ }
+ }
+ return Seed{s: s1<<32 + s2}
+}
+
+//go:linkname runtime_fastrand runtime.fastrand
+func runtime_fastrand() uint32
+
+func rthash(ptr *byte, len int, seed uint64) uint64 {
+ if len == 0 {
+ return seed
+ }
+ // The runtime hasher only works on uintptr. For 64-bit
+ // architectures, we use the hasher directly. Otherwise,
+ // we use two parallel hashers on the lower and upper 32 bits.
+ if unsafe.Sizeof(uintptr(0)) == 8 {
+ return uint64(runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len)))
+ }
+ lo := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len))
+ hi := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed>>32), uintptr(len))
+ return uint64(hi)<<32 | uint64(lo)
+}
+
+//go:linkname runtime_memhash runtime.memhash
+//go:noescape
+func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr
+
+// Sum appends the hash's current 64-bit value to b.
+// It exists for implementing hash.Hash.
+// For direct calls, it is more efficient to use Sum64.
+func (h *Hash) Sum(b []byte) []byte {
+ x := h.Sum64()
+ return append(b,
+ byte(x>>0),
+ byte(x>>8),
+ byte(x>>16),
+ byte(x>>24),
+ byte(x>>32),
+ byte(x>>40),
+ byte(x>>48),
+ byte(x>>56))
+}
+
+// Size returns h's hash value size, 8 bytes.
+func (h *Hash) Size() int { return 8 }
+
+// BlockSize returns h's block size.
+func (h *Hash) BlockSize() int { return len(h.buf) }