1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
|
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ignore
// Generate tables for small malloc size classes.
//
// See malloc.go for overview.
//
// The size classes are chosen so that rounding an allocation
// request up to the next size class wastes at most 12.5% (1.125x).
//
// Each size class has its own page count that gets allocated
// and chopped up when new objects of the size class are needed.
// That page count is chosen so that chopping up the run of
// pages into objects of the given size wastes at most 12.5% (1.125x)
// of the memory. It is not necessary that the cutoff here be
// the same as above.
//
// The two sources of waste multiply, so the worst possible case
// for the above constraints would be that allocations of some
// size might have a 26.6% (1.266x) overhead.
// In practice, only one of the wastes comes into play for a
// given size (sizes < 512 waste mainly on the round-up,
// sizes > 512 waste mainly on the page chopping).
// For really small sizes, alignment constraints force the
// overhead higher.
package main
import (
"bytes"
"flag"
"fmt"
"go/format"
"io"
"log"
"math"
"math/bits"
"os"
)
// Generate msize.go
var stdout = flag.Bool("stdout", false, "write to stdout instead of sizeclasses.go")
func main() {
flag.Parse()
var b bytes.Buffer
fmt.Fprintln(&b, "// Code generated by mksizeclasses.go; DO NOT EDIT.")
fmt.Fprintln(&b, "//go:generate go run mksizeclasses.go")
fmt.Fprintln(&b)
fmt.Fprintln(&b, "package runtime")
classes := makeClasses()
printComment(&b, classes)
printClasses(&b, classes)
out, err := format.Source(b.Bytes())
if err != nil {
log.Fatal(err)
}
if *stdout {
_, err = os.Stdout.Write(out)
} else {
err = os.WriteFile("sizeclasses.go", out, 0666)
}
if err != nil {
log.Fatal(err)
}
}
const (
// Constants that we use and will transfer to the runtime.
maxSmallSize = 32 << 10
smallSizeDiv = 8
smallSizeMax = 1024
largeSizeDiv = 128
pageShift = 13
// Derived constants.
pageSize = 1 << pageShift
)
type class struct {
size int // max size
npages int // number of pages
}
func powerOfTwo(x int) bool {
return x != 0 && x&(x-1) == 0
}
func makeClasses() []class {
var classes []class
classes = append(classes, class{}) // class #0 is a dummy entry
align := 8
for size := align; size <= maxSmallSize; size += align {
if powerOfTwo(size) { // bump alignment once in a while
if size >= 2048 {
align = 256
} else if size >= 128 {
align = size / 8
} else if size >= 32 {
align = 16 // heap bitmaps assume 16 byte alignment for allocations >= 32 bytes.
}
}
if !powerOfTwo(align) {
panic("incorrect alignment")
}
// Make the allocnpages big enough that
// the leftover is less than 1/8 of the total,
// so wasted space is at most 12.5%.
allocsize := pageSize
for allocsize%size > allocsize/8 {
allocsize += pageSize
}
npages := allocsize / pageSize
// If the previous sizeclass chose the same
// allocation size and fit the same number of
// objects into the page, we might as well
// use just this size instead of having two
// different sizes.
if len(classes) > 1 && npages == classes[len(classes)-1].npages && allocsize/size == allocsize/classes[len(classes)-1].size {
classes[len(classes)-1].size = size
continue
}
classes = append(classes, class{size: size, npages: npages})
}
// Increase object sizes if we can fit the same number of larger objects
// into the same number of pages. For example, we choose size 8448 above
// with 6 objects in 7 pages. But we can well use object size 9472,
// which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
// We need to preserve at least largeSizeDiv alignment otherwise
// sizeToClass won't work.
for i := range classes {
if i == 0 {
continue
}
c := &classes[i]
psize := c.npages * pageSize
new_size := (psize / (psize / c.size)) &^ (largeSizeDiv - 1)
if new_size > c.size {
c.size = new_size
}
}
if len(classes) != 68 {
panic("number of size classes has changed")
}
for i := range classes {
computeDivMagic(&classes[i])
}
return classes
}
// computeDivMagic checks that the division required to compute object
// index from span offset can be computed using 32-bit multiplication.
// n / c.size is implemented as (n * (^uint32(0)/uint32(c.size) + 1)) >> 32
// for all 0 <= n <= c.npages * pageSize
func computeDivMagic(c *class) {
// divisor
d := c.size
if d == 0 {
return
}
// maximum input value for which the formula needs to work.
max := c.npages * pageSize
// As reported in [1], if n and d are unsigned N-bit integers, we
// can compute n / d as ⌊n * c / 2^F⌋, where c is ⌈2^F / d⌉ and F is
// computed with:
//
// Algorithm 2: Algorithm to select the number of fractional bits
// and the scaled approximate reciprocal in the case of unsigned
// integers.
//
// if d is a power of two then
// Let F ← log₂(d) and c = 1.
// else
// Let F ← N + L where L is the smallest integer
// such that d ≤ (2^(N+L) mod d) + 2^L.
// end if
//
// [1] "Faster Remainder by Direct Computation: Applications to
// Compilers and Software Libraries" Daniel Lemire, Owen Kaser,
// Nathan Kurz arXiv:1902.01961
//
// To minimize the risk of introducing errors, we implement the
// algorithm exactly as stated, rather than trying to adapt it to
// fit typical Go idioms.
N := bits.Len(uint(max))
var F int
if powerOfTwo(d) {
F = int(math.Log2(float64(d)))
if d != 1<<F {
panic("imprecise log2")
}
} else {
for L := 0; ; L++ {
if d <= ((1<<(N+L))%d)+(1<<L) {
F = N + L
break
}
}
}
// Also, noted in the paper, F is the smallest number of fractional
// bits required. We use 32 bits, because it works for all size
// classes and is fast on all CPU architectures that we support.
if F > 32 {
fmt.Printf("d=%d max=%d N=%d F=%d\n", c.size, max, N, F)
panic("size class requires more than 32 bits of precision")
}
// Brute force double-check with the exact computation that will be
// done by the runtime.
m := ^uint32(0)/uint32(c.size) + 1
for n := 0; n <= max; n++ {
if uint32((uint64(n)*uint64(m))>>32) != uint32(n/c.size) {
fmt.Printf("d=%d max=%d m=%d n=%d\n", d, max, m, n)
panic("bad 32-bit multiply magic")
}
}
}
func printComment(w io.Writer, classes []class) {
fmt.Fprintf(w, "// %-5s %-9s %-10s %-7s %-10s %-9s %-9s\n", "class", "bytes/obj", "bytes/span", "objects", "tail waste", "max waste", "min align")
prevSize := 0
var minAligns [pageShift + 1]int
for i, c := range classes {
if i == 0 {
continue
}
spanSize := c.npages * pageSize
objects := spanSize / c.size
tailWaste := spanSize - c.size*(spanSize/c.size)
maxWaste := float64((c.size-prevSize-1)*objects+tailWaste) / float64(spanSize)
alignBits := bits.TrailingZeros(uint(c.size))
if alignBits > pageShift {
// object alignment is capped at page alignment
alignBits = pageShift
}
for i := range minAligns {
if i > alignBits {
minAligns[i] = 0
} else if minAligns[i] == 0 {
minAligns[i] = c.size
}
}
prevSize = c.size
fmt.Fprintf(w, "// %5d %9d %10d %7d %10d %8.2f%% %9d\n", i, c.size, spanSize, objects, tailWaste, 100*maxWaste, 1<<alignBits)
}
fmt.Fprintf(w, "\n")
fmt.Fprintf(w, "// %-9s %-4s %-12s\n", "alignment", "bits", "min obj size")
for bits, size := range minAligns {
if size == 0 {
break
}
if bits+1 < len(minAligns) && size == minAligns[bits+1] {
continue
}
fmt.Fprintf(w, "// %9d %4d %12d\n", 1<<bits, bits, size)
}
fmt.Fprintf(w, "\n")
}
func printClasses(w io.Writer, classes []class) {
fmt.Fprintln(w, "const (")
fmt.Fprintf(w, "_MaxSmallSize = %d\n", maxSmallSize)
fmt.Fprintf(w, "smallSizeDiv = %d\n", smallSizeDiv)
fmt.Fprintf(w, "smallSizeMax = %d\n", smallSizeMax)
fmt.Fprintf(w, "largeSizeDiv = %d\n", largeSizeDiv)
fmt.Fprintf(w, "_NumSizeClasses = %d\n", len(classes))
fmt.Fprintf(w, "_PageShift = %d\n", pageShift)
fmt.Fprintln(w, ")")
fmt.Fprint(w, "var class_to_size = [_NumSizeClasses]uint16 {")
for _, c := range classes {
fmt.Fprintf(w, "%d,", c.size)
}
fmt.Fprintln(w, "}")
fmt.Fprint(w, "var class_to_allocnpages = [_NumSizeClasses]uint8 {")
for _, c := range classes {
fmt.Fprintf(w, "%d,", c.npages)
}
fmt.Fprintln(w, "}")
fmt.Fprint(w, "var class_to_divmagic = [_NumSizeClasses]uint32 {")
for _, c := range classes {
if c.size == 0 {
fmt.Fprintf(w, "0,")
continue
}
fmt.Fprintf(w, "^uint32(0)/%d+1,", c.size)
}
fmt.Fprintln(w, "}")
// map from size to size class, for small sizes.
sc := make([]int, smallSizeMax/smallSizeDiv+1)
for i := range sc {
size := i * smallSizeDiv
for j, c := range classes {
if c.size >= size {
sc[i] = j
break
}
}
}
fmt.Fprint(w, "var size_to_class8 = [smallSizeMax/smallSizeDiv+1]uint8 {")
for _, v := range sc {
fmt.Fprintf(w, "%d,", v)
}
fmt.Fprintln(w, "}")
// map from size to size class, for large sizes.
sc = make([]int, (maxSmallSize-smallSizeMax)/largeSizeDiv+1)
for i := range sc {
size := smallSizeMax + i*largeSizeDiv
for j, c := range classes {
if c.size >= size {
sc[i] = j
break
}
}
}
fmt.Fprint(w, "var size_to_class128 = [(_MaxSmallSize-smallSizeMax)/largeSizeDiv+1]uint8 {")
for _, v := range sc {
fmt.Fprintf(w, "%d,", v)
}
fmt.Fprintln(w, "}")
}
|