diff options
Diffstat (limited to 'src/internal/intern')
-rw-r--r-- | src/internal/intern/intern.go | 181 | ||||
-rw-r--r-- | src/internal/intern/intern_test.go | 199 |
2 files changed, 380 insertions, 0 deletions
diff --git a/src/internal/intern/intern.go b/src/internal/intern/intern.go new file mode 100644 index 0000000..0e6852f --- /dev/null +++ b/src/internal/intern/intern.go @@ -0,0 +1,181 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package intern lets you make smaller comparable values by boxing +// a larger comparable value (such as a 16 byte string header) down +// into a globally unique 8 byte pointer. +// +// The globally unique pointers are garbage collected with weak +// references and finalizers. This package hides that. +package intern + +import ( + "internal/godebug" + "runtime" + "sync" + "unsafe" +) + +// A Value pointer is the handle to an underlying comparable value. +// See func Get for how Value pointers may be used. +type Value struct { + _ [0]func() // prevent people from accidentally using value type as comparable + cmpVal any + // resurrected is guarded by mu (for all instances of Value). + // It is set true whenever v is synthesized from a uintptr. + resurrected bool +} + +// Get returns the comparable value passed to the Get func +// that returned v. +func (v *Value) Get() any { return v.cmpVal } + +// key is a key in our global value map. +// It contains type-specialized fields to avoid allocations +// when converting common types to empty interfaces. +type key struct { + s string + cmpVal any + // isString reports whether key contains a string. + // Without it, the zero value of key is ambiguous. + isString bool +} + +// keyFor returns a key to use with cmpVal. +func keyFor(cmpVal any) key { + if s, ok := cmpVal.(string); ok { + return key{s: s, isString: true} + } + return key{cmpVal: cmpVal} +} + +// Value returns a *Value built from k. +func (k key) Value() *Value { + if k.isString { + return &Value{cmpVal: k.s} + } + return &Value{cmpVal: k.cmpVal} +} + +var ( + // mu guards valMap, a weakref map of *Value by underlying value. + // It also guards the resurrected field of all *Values. + mu sync.Mutex + valMap = map[key]uintptr{} // to uintptr(*Value) + valSafe = safeMap() // non-nil in safe+leaky mode +) + +var intern = godebug.New("intern") + +// safeMap returns a non-nil map if we're in safe-but-leaky mode, +// as controlled by GODEBUG=intern=leaky +func safeMap() map[key]*Value { + if intern.Value() == "leaky" { + return map[key]*Value{} + } + return nil +} + +// Get returns a pointer representing the comparable value cmpVal. +// +// The returned pointer will be the same for Get(v) and Get(v2) +// if and only if v == v2, and can be used as a map key. +func Get(cmpVal any) *Value { + return get(keyFor(cmpVal)) +} + +// GetByString is identical to Get, except that it is specialized for strings. +// This avoids an allocation from putting a string into an interface{} +// to pass as an argument to Get. +func GetByString(s string) *Value { + return get(key{s: s, isString: true}) +} + +// We play unsafe games that violate Go's rules (and assume a non-moving +// collector). So we quiet Go here. +// See the comment below Get for more implementation details. +// +//go:nocheckptr +func get(k key) *Value { + mu.Lock() + defer mu.Unlock() + + var v *Value + if valSafe != nil { + v = valSafe[k] + } else if addr, ok := valMap[k]; ok { + v = (*Value)(unsafe.Pointer(addr)) + v.resurrected = true + } + if v != nil { + return v + } + v = k.Value() + if valSafe != nil { + valSafe[k] = v + } else { + // SetFinalizer before uintptr conversion (theoretical concern; + // see https://github.com/go4org/intern/issues/13) + runtime.SetFinalizer(v, finalize) + valMap[k] = uintptr(unsafe.Pointer(v)) + } + return v +} + +func finalize(v *Value) { + mu.Lock() + defer mu.Unlock() + if v.resurrected { + // We lost the race. Somebody resurrected it while we + // were about to finalize it. Try again next round. + v.resurrected = false + runtime.SetFinalizer(v, finalize) + return + } + delete(valMap, keyFor(v.cmpVal)) +} + +// Interning is simple if you don't require that unused values be +// garbage collectable. But we do require that; we don't want to be +// DOS vector. We do this by using a uintptr to hide the pointer from +// the garbage collector, and using a finalizer to eliminate the +// pointer when no other code is using it. +// +// The obvious implementation of this is to use a +// map[interface{}]uintptr-of-*interface{}, and set up a finalizer to +// delete from the map. Unfortunately, this is racy. Because pointers +// are being created in violation of Go's unsafety rules, it's +// possible to create a pointer to a value concurrently with the GC +// concluding that the value can be collected. There are other races +// that break the equality invariant as well, but the use-after-free +// will cause a runtime crash. +// +// To make this work, the finalizer needs to know that no references +// have been unsafely created since the finalizer was set up. To do +// this, values carry a "resurrected" sentinel, which gets set +// whenever a pointer is unsafely created. If the finalizer encounters +// the sentinel, it clears the sentinel and delays collection for one +// additional GC cycle, by re-installing itself as finalizer. This +// ensures that the unsafely created pointer is visible to the GC, and +// will correctly prevent collection. +// +// This technique does mean that interned values that get reused take +// at least 3 GC cycles to fully collect (1 to clear the sentinel, 1 +// to clean up the unsafe map, 1 to be actually deleted). +// +// @ianlancetaylor commented in +// https://github.com/golang/go/issues/41303#issuecomment-717401656 +// that it is possible to implement weak references in terms of +// finalizers without unsafe. Unfortunately, the approach he outlined +// does not work here, for two reasons. First, there is no way to +// construct a strong pointer out of a weak pointer; our map stores +// weak pointers, but we must return strong pointers to callers. +// Second, and more fundamentally, we must return not just _a_ strong +// pointer to callers, but _the same_ strong pointer to callers. In +// order to return _the same_ strong pointer to callers, we must track +// it, which is exactly what we cannot do with strong pointers. +// +// See https://github.com/inetaf/netaddr/issues/53 for more +// discussion, and https://github.com/go4org/intern/issues/2 for an +// illustration of the subtleties at play. diff --git a/src/internal/intern/intern_test.go b/src/internal/intern/intern_test.go new file mode 100644 index 0000000..d1e409e --- /dev/null +++ b/src/internal/intern/intern_test.go @@ -0,0 +1,199 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package intern + +import ( + "fmt" + "runtime" + "testing" +) + +func TestBasics(t *testing.T) { + clearMap() + foo := Get("foo") + bar := Get("bar") + empty := Get("") + nilEface := Get(nil) + i := Get(0x7777777) + foo2 := Get("foo") + bar2 := Get("bar") + empty2 := Get("") + nilEface2 := Get(nil) + i2 := Get(0x7777777) + foo3 := GetByString("foo") + empty3 := GetByString("") + + if foo.Get() != foo2.Get() { + t.Error("foo/foo2 values differ") + } + if foo.Get() != foo3.Get() { + t.Error("foo/foo3 values differ") + } + if foo.Get() != "foo" { + t.Error("foo.Get not foo") + } + if foo != foo2 { + t.Error("foo/foo2 pointers differ") + } + if foo != foo3 { + t.Error("foo/foo3 pointers differ") + } + + if bar.Get() != bar2.Get() { + t.Error("bar values differ") + } + if bar.Get() != "bar" { + t.Error("bar.Get not bar") + } + if bar != bar2 { + t.Error("bar pointers differ") + } + + if i.Get() != i.Get() { + t.Error("i values differ") + } + if i.Get() != 0x7777777 { + t.Error("i.Get not 0x7777777") + } + if i != i2 { + t.Error("i pointers differ") + } + + if empty.Get() != empty2.Get() { + t.Error("empty/empty2 values differ") + } + if empty.Get() != empty.Get() { + t.Error("empty/empty3 values differ") + } + if empty.Get() != "" { + t.Error("empty.Get not empty string") + } + if empty != empty2 { + t.Error("empty/empty2 pointers differ") + } + if empty != empty3 { + t.Error("empty/empty3 pointers differ") + } + + if nilEface.Get() != nilEface2.Get() { + t.Error("nilEface values differ") + } + if nilEface.Get() != nil { + t.Error("nilEface.Get not nil") + } + if nilEface != nilEface2 { + t.Error("nilEface pointers differ") + } + + if n := mapLen(); n != 5 { + t.Errorf("map len = %d; want 4", n) + } + + wantEmpty(t) +} + +func wantEmpty(t testing.TB) { + t.Helper() + const gcTries = 5000 + for try := 0; try < gcTries; try++ { + runtime.GC() + n := mapLen() + if n == 0 { + break + } + if try == gcTries-1 { + t.Errorf("map len = %d after (%d GC tries); want 0, contents: %v", n, gcTries, mapKeys()) + } + } +} + +func TestStress(t *testing.T) { + iters := 10000 + if testing.Short() { + iters = 1000 + } + var sink []byte + for i := 0; i < iters; i++ { + _ = Get("foo") + sink = make([]byte, 1<<20) + } + _ = sink +} + +func BenchmarkStress(b *testing.B) { + done := make(chan struct{}) + defer close(done) + go func() { + for { + select { + case <-done: + return + default: + } + runtime.GC() + } + }() + + clearMap() + v1 := Get("foo") + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + v2 := Get("foo") + if v1 != v2 { + b.Fatal("wrong value") + } + // And also a key we don't retain: + _ = Get("bar") + } + }) + runtime.GC() + wantEmpty(b) +} + +func mapLen() int { + mu.Lock() + defer mu.Unlock() + return len(valMap) +} + +func mapKeys() (keys []string) { + mu.Lock() + defer mu.Unlock() + for k := range valMap { + keys = append(keys, fmt.Sprint(k)) + } + return keys +} + +func clearMap() { + mu.Lock() + defer mu.Unlock() + for k := range valMap { + delete(valMap, k) + } +} + +var ( + globalString = "not a constant" + sink string +) + +func TestGetByStringAllocs(t *testing.T) { + allocs := int(testing.AllocsPerRun(100, func() { + GetByString(globalString) + })) + if allocs != 0 { + t.Errorf("GetString allocated %d objects, want 0", allocs) + } +} + +func BenchmarkGetByString(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + v := GetByString(globalString) + sink = v.Get().(string) + } +} |