summaryrefslogtreecommitdiffstats
path: root/src/internal/intern/intern.go
blob: 2f97c2e6694286ef120f2c1067f16395018d5589 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package intern lets you make smaller comparable values by boxing
// a larger comparable value (such as a 16 byte string header) down
// into a globally unique 8 byte pointer.
//
// The globally unique pointers are garbage collected with weak
// references and finalizers. This package hides that.
package intern

import (
	"internal/godebug"
	"runtime"
	"sync"
	"unsafe"
)

// A Value pointer is the handle to an underlying comparable value.
// See func Get for how Value pointers may be used.
type Value struct {
	_      [0]func() // prevent people from accidentally using value type as comparable
	cmpVal any
	// resurrected is guarded by mu (for all instances of Value).
	// It is set true whenever v is synthesized from a uintptr.
	resurrected bool
}

// Get returns the comparable value passed to the Get func
// that returned v.
func (v *Value) Get() any { return v.cmpVal }

// key is a key in our global value map.
// It contains type-specialized fields to avoid allocations
// when converting common types to empty interfaces.
type key struct {
	s      string
	cmpVal any
	// isString reports whether key contains a string.
	// Without it, the zero value of key is ambiguous.
	isString bool
}

// keyFor returns a key to use with cmpVal.
func keyFor(cmpVal any) key {
	if s, ok := cmpVal.(string); ok {
		return key{s: s, isString: true}
	}
	return key{cmpVal: cmpVal}
}

// Value returns a *Value built from k.
func (k key) Value() *Value {
	if k.isString {
		return &Value{cmpVal: k.s}
	}
	return &Value{cmpVal: k.cmpVal}
}

var (
	// mu guards valMap, a weakref map of *Value by underlying value.
	// It also guards the resurrected field of all *Values.
	mu      sync.Mutex
	valMap  = map[key]uintptr{} // to uintptr(*Value)
	valSafe = safeMap()         // non-nil in safe+leaky mode
)

var intern = godebug.New("#intern")

// safeMap returns a non-nil map if we're in safe-but-leaky mode,
// as controlled by GODEBUG=intern=leaky
func safeMap() map[key]*Value {
	if intern.Value() == "leaky" {
		return map[key]*Value{}
	}
	return nil
}

// Get returns a pointer representing the comparable value cmpVal.
//
// The returned pointer will be the same for Get(v) and Get(v2)
// if and only if v == v2, and can be used as a map key.
func Get(cmpVal any) *Value {
	return get(keyFor(cmpVal))
}

// GetByString is identical to Get, except that it is specialized for strings.
// This avoids an allocation from putting a string into an interface{}
// to pass as an argument to Get.
func GetByString(s string) *Value {
	return get(key{s: s, isString: true})
}

// We play unsafe games that violate Go's rules (and assume a non-moving
// collector). So we quiet Go here.
// See the comment below Get for more implementation details.
//
//go:nocheckptr
func get(k key) *Value {
	mu.Lock()
	defer mu.Unlock()

	var v *Value
	if valSafe != nil {
		v = valSafe[k]
	} else if addr, ok := valMap[k]; ok {
		v = (*Value)(unsafe.Pointer(addr))
		v.resurrected = true
	}
	if v != nil {
		return v
	}
	v = k.Value()
	if valSafe != nil {
		valSafe[k] = v
	} else {
		// SetFinalizer before uintptr conversion (theoretical concern;
		// see https://github.com/go4org/intern/issues/13)
		runtime.SetFinalizer(v, finalize)
		valMap[k] = uintptr(unsafe.Pointer(v))
	}
	return v
}

func finalize(v *Value) {
	mu.Lock()
	defer mu.Unlock()
	if v.resurrected {
		// We lost the race. Somebody resurrected it while we
		// were about to finalize it. Try again next round.
		v.resurrected = false
		runtime.SetFinalizer(v, finalize)
		return
	}
	delete(valMap, keyFor(v.cmpVal))
}

// Interning is simple if you don't require that unused values be
// garbage collectable. But we do require that; we don't want to be
// DOS vector. We do this by using a uintptr to hide the pointer from
// the garbage collector, and using a finalizer to eliminate the
// pointer when no other code is using it.
//
// The obvious implementation of this is to use a
// map[interface{}]uintptr-of-*interface{}, and set up a finalizer to
// delete from the map. Unfortunately, this is racy. Because pointers
// are being created in violation of Go's unsafety rules, it's
// possible to create a pointer to a value concurrently with the GC
// concluding that the value can be collected. There are other races
// that break the equality invariant as well, but the use-after-free
// will cause a runtime crash.
//
// To make this work, the finalizer needs to know that no references
// have been unsafely created since the finalizer was set up. To do
// this, values carry a "resurrected" sentinel, which gets set
// whenever a pointer is unsafely created. If the finalizer encounters
// the sentinel, it clears the sentinel and delays collection for one
// additional GC cycle, by re-installing itself as finalizer. This
// ensures that the unsafely created pointer is visible to the GC, and
// will correctly prevent collection.
//
// This technique does mean that interned values that get reused take
// at least 3 GC cycles to fully collect (1 to clear the sentinel, 1
// to clean up the unsafe map, 1 to be actually deleted).
//
// @ianlancetaylor commented in
// https://github.com/golang/go/issues/41303#issuecomment-717401656
// that it is possible to implement weak references in terms of
// finalizers without unsafe. Unfortunately, the approach he outlined
// does not work here, for two reasons. First, there is no way to
// construct a strong pointer out of a weak pointer; our map stores
// weak pointers, but we must return strong pointers to callers.
// Second, and more fundamentally, we must return not just _a_ strong
// pointer to callers, but _the same_ strong pointer to callers. In
// order to return _the same_ strong pointer to callers, we must track
// it, which is exactly what we cannot do with strong pointers.
//
// See https://github.com/inetaf/netaddr/issues/53 for more
// discussion, and https://github.com/go4org/intern/issues/2 for an
// illustration of the subtleties at play.