1 files changed, 331 insertions, 0 deletions
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
new file mode 100644
index 0000000..d4b6eef
--- /dev/null
+++ b/src/runtime/mcache.go
@@ -0,0 +1,331 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/atomic"
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+// Per-thread (in Go, per-P) cache for small objects.
+// This includes a small object cache and local allocation stats.
+// No locking needed because it is per-thread (per-P).
+//
+// mcaches are allocated from non-GC'd memory, so any heap pointers
+// must be specially handled.
+type mcache struct {
+	_ sys.NotInHeap
+
+	// The following members are accessed on every malloc,
+	// so they are grouped here for better caching.
+	nextSample uintptr // trigger heap sample after allocating this many bytes
+	scanAlloc  uintptr // bytes of scannable heap allocated
+
+	// Allocator cache for tiny objects w/o pointers.
+	// See "Tiny allocator" comment in malloc.go.
+
+	// tiny points to the beginning of the current tiny block, or
+	// nil if there is no current tiny block.
+	//
+	// tiny is a heap pointer. Since mcache is in non-GC'd memory,
+	// we handle it by clearing it in releaseAll during mark
+	// termination.
+	//
+	// tinyAllocs is the number of tiny allocations performed
+	// by the P that owns this mcache.
+	tiny       uintptr
+	tinyoffset uintptr
+	tinyAllocs uintptr
+
+	// The rest is not accessed on every malloc.
+
+	alloc [numSpanClasses]*mspan // spans to allocate from, indexed by spanClass
+
+	stackcache [_NumStackOrders]stackfreelist
+
+	// flushGen indicates the sweepgen during which this mcache
+	// was last flushed. If flushGen != mheap_.sweepgen, the spans
+	// in this mcache are stale and need to the flushed so they
+	// can be swept. This is done in acquirep.
+	flushGen atomic.Uint32
+}
+
+// A gclink is a node in a linked list of blocks, like mlink,
+// but it is opaque to the garbage collector.
+// The GC does not trace the pointers during collection,
+// and the compiler does not emit write barriers for assignments
+// of gclinkptr values. Code should store references to gclinks
+// as gclinkptr, not as *gclink.
+type gclink struct {
+	next gclinkptr
+}
+
+// A gclinkptr is a pointer to a gclink, but it is opaque
+// to the garbage collector.
+type gclinkptr uintptr
+
+// ptr returns the *gclink form of p.
+// The result should be used for accessing fields, not stored
+// in other data structures.
+func (p gclinkptr) ptr() *gclink {
+	return (*gclink)(unsafe.Pointer(p))
+}
+
+type stackfreelist struct {
+	list gclinkptr // linked list of free stacks
+	size uintptr   // total size of stacks in list
+}
+
+// dummy mspan that contains no free objects.
+var emptymspan mspan
+
+func allocmcache() *mcache {
+	var c *mcache
+	systemstack(func() {
+		lock(&mheap_.lock)
+		c = (*mcache)(mheap_.cachealloc.alloc())
+		c.flushGen.Store(mheap_.sweepgen)
+		unlock(&mheap_.lock)
+	})
+	for i := range c.alloc {
+		c.alloc[i] = &emptymspan
+	}
+	c.nextSample = nextSample()
+	return c
+}
+
+// freemcache releases resources associated with this
+// mcache and puts the object onto a free list.
+//
+// In some cases there is no way to simply release
+// resources, such as statistics, so donate them to
+// a different mcache (the recipient).
+func freemcache(c *mcache) {
+	systemstack(func() {
+		c.releaseAll()
+		stackcache_clear(c)
+
+		// NOTE(rsc,rlh): If gcworkbuffree comes back, we need to coordinate
+		// with the stealing of gcworkbufs during garbage collection to avoid
+		// a race where the workbuf is double-freed.
+		// gcworkbuffree(c.gcworkbuf)
+
+		lock(&mheap_.lock)
+		mheap_.cachealloc.free(unsafe.Pointer(c))
+		unlock(&mheap_.lock)
+	})
+}
+
+// getMCache is a convenience function which tries to obtain an mcache.
+//
+// Returns nil if we're not bootstrapping or we don't have a P. The caller's
+// P must not change, so we must be in a non-preemptible state.
+func getMCache(mp *m) *mcache {
+	// Grab the mcache, since that's where stats live.
+	pp := mp.p.ptr()
+	var c *mcache
+	if pp == nil {
+		// We will be called without a P while bootstrapping,
+		// in which case we use mcache0, which is set in mallocinit.
+		// mcache0 is cleared when bootstrapping is complete,
+		// by procresize.
+		c = mcache0
+	} else {
+		c = pp.mcache
+	}
+	return c
+}
+
+// refill acquires a new span of span class spc for c. This span will
+// have at least one free object. The current span in c must be full.
+//
+// Must run in a non-preemptible context since otherwise the owner of
+// c could change.
+func (c *mcache) refill(spc spanClass) {
+	// Return the current cached span to the central lists.
+	s := c.alloc[spc]
+
+	if s.allocCount != s.nelems {
+		throw("refill of span with free space remaining")
+	}
+	if s != &emptymspan {
+		// Mark this span as no longer cached.
+		if s.sweepgen != mheap_.sweepgen+3 {
+			throw("bad sweepgen in refill")
+		}
+		mheap_.central[spc].mcentral.uncacheSpan(s)
+
+		// Count up how many slots were used and record it.
+		stats := memstats.heapStats.acquire()
+		slotsUsed := int64(s.allocCount) - int64(s.allocCountBeforeCache)
+		atomic.Xadd64(&stats.smallAllocCount[spc.sizeclass()], slotsUsed)
+
+		// Flush tinyAllocs.
+		if spc == tinySpanClass {
+			atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs))
+			c.tinyAllocs = 0
+		}
+		memstats.heapStats.release()
+
+		// Count the allocs in inconsistent, internal stats.
+		bytesAllocated := slotsUsed * int64(s.elemsize)
+		gcController.totalAlloc.Add(bytesAllocated)
+
+		// Clear the second allocCount just to be safe.
+		s.allocCountBeforeCache = 0
+	}
+
+	// Get a new cached span from the central lists.
+	s = mheap_.central[spc].mcentral.cacheSpan()
+	if s == nil {
+		throw("out of memory")
+	}
+
+	if s.allocCount == s.nelems {
+		throw("span has no free space")
+	}
+
+	// Indicate that this span is cached and prevent asynchronous
+	// sweeping in the next sweep phase.
+	s.sweepgen = mheap_.sweepgen + 3
+
+	// Store the current alloc count for accounting later.
+	s.allocCountBeforeCache = s.allocCount
+
+	// Update heapLive and flush scanAlloc.
+	//
+	// We have not yet allocated anything new into the span, but we
+	// assume that all of its slots will get used, so this makes
+	// heapLive an overestimate.
+	//
+	// When the span gets uncached, we'll fix up this overestimate
+	// if necessary (see releaseAll).
+	//
+	// We pick an overestimate here because an underestimate leads
+	// the pacer to believe that it's in better shape than it is,
+	// which appears to lead to more memory used. See #53738 for
+	// more details.
+	usedBytes := uintptr(s.allocCount) * s.elemsize
+	gcController.update(int64(s.npages*pageSize)-int64(usedBytes), int64(c.scanAlloc))
+	c.scanAlloc = 0
+
+	c.alloc[spc] = s
+}
+
+// allocLarge allocates a span for a large object.
+func (c *mcache) allocLarge(size uintptr, noscan bool) *mspan {
+	if size+_PageSize < size {
+		throw("out of memory")
+	}
+	npages := size >> _PageShift
+	if size&_PageMask != 0 {
+		npages++
+	}
+
+	// Deduct credit for this span allocation and sweep if
+	// necessary. mHeap_Alloc will also sweep npages, so this only
+	// pays the debt down to npage pages.
+	deductSweepCredit(npages*_PageSize, npages)
+
+	spc := makeSpanClass(0, noscan)
+	s := mheap_.alloc(npages, spc)
+	if s == nil {
+		throw("out of memory")
+	}
+
+	// Count the alloc in consistent, external stats.
+	stats := memstats.heapStats.acquire()
+	atomic.Xadd64(&stats.largeAlloc, int64(npages*pageSize))
+	atomic.Xadd64(&stats.largeAllocCount, 1)
+	memstats.heapStats.release()
+
+	// Count the alloc in inconsistent, internal stats.
+	gcController.totalAlloc.Add(int64(npages * pageSize))
+
+	// Update heapLive.
+	gcController.update(int64(s.npages*pageSize), 0)
+
+	// Put the large span in the mcentral swept list so that it's
+	// visible to the background sweeper.
+	mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
+	s.limit = s.base() + size
+	s.initHeapBits(false)
+	return s
+}
+
+func (c *mcache) releaseAll() {
+	// Take this opportunity to flush scanAlloc.
+	scanAlloc := int64(c.scanAlloc)
+	c.scanAlloc = 0
+
+	sg := mheap_.sweepgen
+	dHeapLive := int64(0)
+	for i := range c.alloc {
+		s := c.alloc[i]
+		if s != &emptymspan {
+			slotsUsed := int64(s.allocCount) - int64(s.allocCountBeforeCache)
+			s.allocCountBeforeCache = 0
+
+			// Adjust smallAllocCount for whatever was allocated.
+			stats := memstats.heapStats.acquire()
+			atomic.Xadd64(&stats.smallAllocCount[spanClass(i).sizeclass()], slotsUsed)
+			memstats.heapStats.release()
+
+			// Adjust the actual allocs in inconsistent, internal stats.
+			// We assumed earlier that the full span gets allocated.
+			gcController.totalAlloc.Add(slotsUsed * int64(s.elemsize))
+
+			if s.sweepgen != sg+1 {
+				// refill conservatively counted unallocated slots in gcController.heapLive.
+				// Undo this.
+				//
+				// If this span was cached before sweep, then gcController.heapLive was totally
+				// recomputed since caching this span, so we don't do this for stale spans.
+				dHeapLive -= int64(s.nelems-s.allocCount) * int64(s.elemsize)
+			}
+
+			// Release the span to the mcentral.
+			mheap_.central[i].mcentral.uncacheSpan(s)
+			c.alloc[i] = &emptymspan
+		}
+	}
+	// Clear tinyalloc pool.
+	c.tiny = 0
+	c.tinyoffset = 0
+
+	// Flush tinyAllocs.
+	stats := memstats.heapStats.acquire()
+	atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs))
+	c.tinyAllocs = 0
+	memstats.heapStats.release()
+
+	// Update heapLive and heapScan.
+	gcController.update(dHeapLive, scanAlloc)
+}
+
+// prepareForSweep flushes c if the system has entered a new sweep phase
+// since c was populated. This must happen between the sweep phase
+// starting and the first allocation from c.
+func (c *mcache) prepareForSweep() {
+	// Alternatively, instead of making sure we do this on every P
+	// between starting the world and allocating on that P, we
+	// could leave allocate-black on, allow allocation to continue
+	// as usual, use a ragged barrier at the beginning of sweep to
+	// ensure all cached spans are swept, and then disable
+	// allocate-black. However, with this approach it's difficult
+	// to avoid spilling mark bits into the *next* GC cycle.
+	sg := mheap_.sweepgen
+	flushGen := c.flushGen.Load()
+	if flushGen == sg {
+		return
+	} else if flushGen != sg-2 {
+		println("bad flushGen", flushGen, "in prepareForSweep; sweepgen", sg)
+		throw("bad flushGen")
+	}
+	c.releaseAll()
+	stackcache_clear(c)
+	c.flushGen.Store(mheap_.sweepgen) // Synchronizes with gcStart
+}