summaryrefslogtreecommitdiffstats
path: root/src/cmd/compile/internal/ssa/loopreschedchecks.go
blob: 0ac473d229474dbc7a6040edfb645df60f82175b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package ssa

import (
	"cmd/compile/internal/types"
	"fmt"
)

// an edgeMem records a backedge, together with the memory
// phi functions at the target of the backedge that must
// be updated when a rescheduling check replaces the backedge.
type edgeMem struct {
	e Edge
	m *Value // phi for memory at dest of e
}

// a rewriteTarget is a value-argindex pair indicating
// where a rewrite is applied.  Note that this is for values,
// not for block controls, because block controls are not targets
// for the rewrites performed in inserting rescheduling checks.
type rewriteTarget struct {
	v *Value
	i int
}

type rewrite struct {
	before, after *Value          // before is the expected value before rewrite, after is the new value installed.
	rewrites      []rewriteTarget // all the targets for this rewrite.
}

func (r *rewrite) String() string {
	s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String()
	for _, rw := range r.rewrites {
		s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")"
	}
	s += "\n"
	return s
}

// insertLoopReschedChecks inserts rescheduling checks on loop backedges.
func insertLoopReschedChecks(f *Func) {
	// TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path.

	// Loop reschedule checks compare the stack pointer with
	// the per-g stack bound.  If the pointer appears invalid,
	// that means a reschedule check is needed.
	//
	// Steps:
	// 1. locate backedges.
	// 2. Record memory definitions at block end so that
	//    the SSA graph for mem can be properly modified.
	// 3. Ensure that phi functions that will-be-needed for mem
	//    are present in the graph, initially with trivial inputs.
	// 4. Record all to-be-modified uses of mem;
	//    apply modifications (split into two steps to simplify and
	//    avoided nagging order-dependencies).
	// 5. Rewrite backedges to include reschedule check,
	//    and modify destination phi function appropriately with new
	//    definitions for mem.

	if f.NoSplit { // nosplit functions don't reschedule.
		return
	}

	backedges := backedges(f)
	if len(backedges) == 0 { // no backedges means no rescheduling checks.
		return
	}

	lastMems := findLastMems(f)

	idom := f.Idom()
	po := f.postorder()
	// The ordering in the dominator tree matters; it's important that
	// the walk of the dominator tree also be a preorder (i.e., a node is
	// visited only after all its non-backedge predecessors have been visited).
	sdom := newSparseOrderedTree(f, idom, po)

	if f.pass.debug > 1 {
		fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry))
	}

	tofixBackedges := []edgeMem{}

	for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data.
		tofixBackedges = append(tofixBackedges, edgeMem{e, nil})
	}

	// It's possible that there is no memory state (no global/pointer loads/stores or calls)
	if lastMems[f.Entry.ID] == nil {
		lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Pos, OpInitMem, types.TypeMem)
	}

	memDefsAtBlockEnds := f.Cache.allocValueSlice(f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.
	defer f.Cache.freeValueSlice(memDefsAtBlockEnds)

	// Propagate last mem definitions forward through successor blocks.
	for i := len(po) - 1; i >= 0; i-- {
		b := po[i]
		mem := lastMems[b.ID]
		for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors.
			// loop because there might be backedges that haven't been visited yet.
			mem = memDefsAtBlockEnds[b.Preds[j].b.ID]
		}
		memDefsAtBlockEnds[b.ID] = mem
		if f.pass.debug > 2 {
			fmt.Printf("memDefsAtBlockEnds[%s] = %s\n", b, mem)
		}
	}

	// Maps from block to newly-inserted phi function in block.
	newmemphis := make(map[*Block]rewrite)

	// Insert phi functions as necessary for future changes to flow graph.
	for i, emc := range tofixBackedges {
		e := emc.e
		h := e.b

		// find the phi function for the memory input at "h", if there is one.
		var headerMemPhi *Value // look for header mem phi

		for _, v := range h.Values {
			if v.Op == OpPhi && v.Type.IsMemory() {
				headerMemPhi = v
			}
		}

		if headerMemPhi == nil {
			// if the header is nil, make a trivial phi from the dominator
			mem0 := memDefsAtBlockEnds[idom[h.ID].ID]
			headerMemPhi = newPhiFor(h, mem0)
			newmemphis[h] = rewrite{before: mem0, after: headerMemPhi}
			addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis, sdom)

		}
		tofixBackedges[i].m = headerMemPhi

	}
	if f.pass.debug > 0 {
		for b, r := range newmemphis {
			fmt.Printf("before b=%s, rewrite=%s\n", b, r.String())
		}
	}

	// dfPhiTargets notes inputs to phis in dominance frontiers that should not
	// be rewritten as part of the dominated children of some outer rewrite.
	dfPhiTargets := make(map[rewriteTarget]bool)

	rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis, dfPhiTargets, sdom)

	if f.pass.debug > 0 {
		for b, r := range newmemphis {
			fmt.Printf("after b=%s, rewrite=%s\n", b, r.String())
		}
	}

	// Apply collected rewrites.
	for _, r := range newmemphis {
		for _, rw := range r.rewrites {
			rw.v.SetArg(rw.i, r.after)
		}
	}

	// Rewrite backedges to include reschedule checks.
	for _, emc := range tofixBackedges {
		e := emc.e
		headerMemPhi := emc.m
		h := e.b
		i := e.i
		p := h.Preds[i]
		bb := p.b
		mem0 := headerMemPhi.Args[i]
		// bb e->p h,
		// Because we're going to insert a rare-call, make sure the
		// looping edge still looks likely.
		likely := BranchLikely
		if p.i != 0 {
			likely = BranchUnlikely
		}
		if bb.Kind != BlockPlain { // backedges can be unconditional. e.g., if x { something; continue }
			bb.Likely = likely
		}

		// rewrite edge to include reschedule check
		// existing edges:
		//
		// bb.Succs[p.i] == Edge{h, i}
		// h.Preds[i] == p == Edge{bb,p.i}
		//
		// new block(s):
		// test:
		//    if sp < g.limit { goto sched }
		//    goto join
		// sched:
		//    mem1 := call resched (mem0)
		//    goto join
		// join:
		//    mem2 := phi(mem0, mem1)
		//    goto h
		//
		// and correct arg i of headerMemPhi and headerCtrPhi
		//
		// EXCEPT: join block containing only phi functions is bad
		// for the register allocator.  Therefore, there is no
		// join, and branches targeting join must instead target
		// the header, and the other phi functions within header are
		// adjusted for the additional input.

		test := f.NewBlock(BlockIf)
		sched := f.NewBlock(BlockPlain)

		test.Pos = bb.Pos
		sched.Pos = bb.Pos

		// if sp < g.limit { goto sched }
		// goto header

		cfgtypes := &f.Config.Types
		pt := cfgtypes.Uintptr
		g := test.NewValue1(bb.Pos, OpGetG, pt, mem0)
		sp := test.NewValue0(bb.Pos, OpSP, pt)
		cmpOp := OpLess64U
		if pt.Size() == 4 {
			cmpOp = OpLess32U
		}
		limaddr := test.NewValue1I(bb.Pos, OpOffPtr, pt, 2*pt.Size(), g)
		lim := test.NewValue2(bb.Pos, OpLoad, pt, limaddr, mem0)
		cmp := test.NewValue2(bb.Pos, cmpOp, cfgtypes.Bool, sp, lim)
		test.SetControl(cmp)

		// if true, goto sched
		test.AddEdgeTo(sched)

		// if false, rewrite edge to header.
		// do NOT remove+add, because that will perturb all the other phi functions
		// as well as messing up other edges to the header.
		test.Succs = append(test.Succs, Edge{h, i})
		h.Preds[i] = Edge{test, 1}
		headerMemPhi.SetArg(i, mem0)

		test.Likely = BranchUnlikely

		// sched:
		//    mem1 := call resched (mem0)
		//    goto header
		resched := f.fe.Syslook("goschedguarded")
		call := sched.NewValue1A(bb.Pos, OpStaticCall, types.TypeResultMem, StaticAuxCall(resched, bb.Func.ABIDefault.ABIAnalyzeTypes(nil, nil)), mem0)
		mem1 := sched.NewValue1I(bb.Pos, OpSelectN, types.TypeMem, 0, call)
		sched.AddEdgeTo(h)
		headerMemPhi.AddArg(mem1)

		bb.Succs[p.i] = Edge{test, 0}
		test.Preds = append(test.Preds, Edge{bb, p.i})

		// Must correct all the other phi functions in the header for new incoming edge.
		// Except for mem phis, it will be the same value seen on the original
		// backedge at index i.
		for _, v := range h.Values {
			if v.Op == OpPhi && v != headerMemPhi {
				v.AddArg(v.Args[i])
			}
		}
	}

	f.invalidateCFG()

	if f.pass.debug > 1 {
		sdom = newSparseTree(f, f.Idom())
		fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry))
	}
}

// newPhiFor inserts a new Phi function into b,
// with all inputs set to v.
func newPhiFor(b *Block, v *Value) *Value {
	phiV := b.NewValue0(b.Pos, OpPhi, v.Type)

	for range b.Preds {
		phiV.AddArg(v)
	}
	return phiV
}

// rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted
// in block h will replace a previous definition.  Block b is the block currently being processed;
// if b has its own phi definition then it takes the place of h.
// defsForUses provides information about other definitions of the variable that are present
// (and if nil, indicates that the variable is no longer live)
// sdom must yield a preorder of the flow graph if recursively walked, root-to-children.
// The result of newSparseOrderedTree with order supplied by a dfs-postorder satisfies this
// requirement.
func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite, dfPhiTargets map[rewriteTarget]bool, sdom SparseTree) {
	// If b is a block with a new phi, then a new rewrite applies below it in the dominator tree.
	if _, ok := newphis[b]; ok {
		h = b
	}
	change := newphis[h]
	x := change.before
	y := change.after

	// Apply rewrites to this block
	if x != nil { // don't waste time on the common case of no definition.
		p := &change.rewrites
		for _, v := range b.Values {
			if v == y { // don't rewrite self -- phi inputs are handled below.
				continue
			}
			for i, w := range v.Args {
				if w != x {
					continue
				}
				tgt := rewriteTarget{v, i}

				// It's possible dominated control flow will rewrite this instead.
				// Visiting in preorder (a property of how sdom was constructed)
				// ensures that these are seen in the proper order.
				if dfPhiTargets[tgt] {
					continue
				}
				*p = append(*p, tgt)
				if f.pass.debug > 1 {
					fmt.Printf("added block target for h=%v, b=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
						h, b, x, y, v, i)
				}
			}
		}

		// Rewrite appropriate inputs of phis reached in successors
		// in dominance frontier, self, and dominated.
		// If the variable def reaching uses in b is itself defined in b, then the new phi function
		// does not reach the successors of b.  (This assumes a bit about the structure of the
		// phi use-def graph, but it's true for memory.)
		if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b {
			for _, e := range b.Succs {
				s := e.b

				for _, v := range s.Values {
					if v.Op == OpPhi && v.Args[e.i] == x {
						tgt := rewriteTarget{v, e.i}
						*p = append(*p, tgt)
						dfPhiTargets[tgt] = true
						if f.pass.debug > 1 {
							fmt.Printf("added phi target for h=%v, b=%v, s=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
								h, b, s, x, y, v.LongString(), e.i)
						}
						break
					}
				}
			}
		}
		newphis[h] = change
	}

	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
		rewriteNewPhis(h, c, f, defsForUses, newphis, dfPhiTargets, sdom) // TODO: convert to explicit stack from recursion.
	}
}

// addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA)
// a new definition for variable "x" inserted at h (usually but not necessarily a phi).
// These new phis can only occur at the dominance frontier of h; block s is in the dominance
// frontier of h if h does not strictly dominate s and if s is a successor of a block b where
// either b = h or h strictly dominates b.
// These newly created phis are themselves new definitions that may require addition of their
// own trivial phi functions in their own dominance frontier, and this is handled recursively.
func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite, sdom SparseTree) {
	oldv := defForUses[b.ID]
	if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b
		return
	}
	idom := f.Idom()
outer:
	for _, e := range b.Succs {
		s := e.b
		// check phi functions in the dominance frontier
		if sdom.isAncestor(h, s) {
			continue // h dominates s, successor of b, therefore s is not in the frontier.
		}
		if _, ok := newphis[s]; ok {
			continue // successor s of b already has a new phi function, so there is no need to add another.
		}
		if x != nil {
			for _, v := range s.Values {
				if v.Op == OpPhi && v.Args[e.i] == x {
					continue outer // successor s of b has an old phi function, so there is no need to add another.
				}
			}
		}

		old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs.
		headerPhi := newPhiFor(s, old)
		// the new phi will replace "old" in block s and all blocks dominated by s.
		newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi"
		addDFphis(old, s, s, f, defForUses, newphis, sdom)  // the new definition may also create new phi functions.
	}
	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
		addDFphis(x, h, c, f, defForUses, newphis, sdom) // TODO: convert to explicit stack from recursion.
	}
}

// findLastMems maps block ids to last memory-output op in a block, if any.
func findLastMems(f *Func) []*Value {

	var stores []*Value
	lastMems := f.Cache.allocValueSlice(f.NumBlocks())
	defer f.Cache.freeValueSlice(lastMems)
	storeUse := f.newSparseSet(f.NumValues())
	defer f.retSparseSet(storeUse)
	for _, b := range f.Blocks {
		// Find all the stores in this block. Categorize their uses:
		//  storeUse contains stores which are used by a subsequent store.
		storeUse.clear()
		stores = stores[:0]
		var memPhi *Value
		for _, v := range b.Values {
			if v.Op == OpPhi {
				if v.Type.IsMemory() {
					memPhi = v
				}
				continue
			}
			if v.Type.IsMemory() {
				stores = append(stores, v)
				for _, a := range v.Args {
					if a.Block == b && a.Type.IsMemory() {
						storeUse.add(a.ID)
					}
				}
			}
		}
		if len(stores) == 0 {
			lastMems[b.ID] = memPhi
			continue
		}

		// find last store in the block
		var last *Value
		for _, v := range stores {
			if storeUse.contains(v.ID) {
				continue
			}
			if last != nil {
				b.Fatalf("two final stores - simultaneous live stores %s %s", last, v)
			}
			last = v
		}
		if last == nil {
			b.Fatalf("no last store found - cycle?")
		}

		// If this is a tuple containing a mem, select just
		// the mem. This will generate ops we don't need, but
		// it's the easiest thing to do.
		if last.Type.IsTuple() {
			last = b.NewValue1(last.Pos, OpSelect1, types.TypeMem, last)
		} else if last.Type.IsResults() {
			last = b.NewValue1I(last.Pos, OpSelectN, types.TypeMem, int64(last.Type.NumFields()-1), last)
		}

		lastMems[b.ID] = last
	}
	return lastMems
}

// mark values
type markKind uint8

const (
	notFound    markKind = iota // block has not been discovered yet
	notExplored                 // discovered and in queue, outedges not processed yet
	explored                    // discovered and in queue, outedges processed
	done                        // all done, in output ordering
)

type backedgesState struct {
	b *Block
	i int
}

// backedges returns a slice of successor edges that are back
// edges.  For reducible loops, edge.b is the header.
func backedges(f *Func) []Edge {
	edges := []Edge{}
	mark := make([]markKind, f.NumBlocks())
	stack := []backedgesState{}

	mark[f.Entry.ID] = notExplored
	stack = append(stack, backedgesState{f.Entry, 0})

	for len(stack) > 0 {
		l := len(stack)
		x := stack[l-1]
		if x.i < len(x.b.Succs) {
			e := x.b.Succs[x.i]
			stack[l-1].i++
			s := e.b
			if mark[s.ID] == notFound {
				mark[s.ID] = notExplored
				stack = append(stack, backedgesState{s, 0})
			} else if mark[s.ID] == notExplored {
				edges = append(edges, e)
			}
		} else {
			mark[x.b.ID] = done
			stack = stack[0 : l-1]
		}
	}
	return edges
}