src/internal/trace/v2/testtrace/validation.go


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361

// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package testtrace

import (
	"errors"
	"fmt"
	"internal/trace/v2"
	"slices"
	"strings"
)

// Validator is a type used for validating a stream of trace.Events.
type Validator struct {
	lastTs   trace.Time
	gs       map[trace.GoID]*goState
	ps       map[trace.ProcID]*procState
	ms       map[trace.ThreadID]*schedContext
	ranges   map[trace.ResourceID][]string
	tasks    map[trace.TaskID]string
	seenSync bool
}

type schedContext struct {
	M trace.ThreadID
	P trace.ProcID
	G trace.GoID
}

type goState struct {
	state   trace.GoState
	binding *schedContext
}

type procState struct {
	state   trace.ProcState
	binding *schedContext
}

// NewValidator creates a new Validator.
func NewValidator() *Validator {
	return &Validator{
		gs:     make(map[trace.GoID]*goState),
		ps:     make(map[trace.ProcID]*procState),
		ms:     make(map[trace.ThreadID]*schedContext),
		ranges: make(map[trace.ResourceID][]string),
		tasks:  make(map[trace.TaskID]string),
	}
}

// Event validates ev as the next event in a stream of trace.Events.
//
// Returns an error if validation fails.
func (v *Validator) Event(ev trace.Event) error {
	e := new(errAccumulator)

	// Validate timestamp order.
	if v.lastTs != 0 {
		if ev.Time() <= v.lastTs {
			e.Errorf("timestamp out-of-order for %+v", ev)
		} else {
			v.lastTs = ev.Time()
		}
	} else {
		v.lastTs = ev.Time()
	}

	// Validate event stack.
	checkStack(e, ev.Stack())

	switch ev.Kind() {
	case trace.EventSync:
		// Just record that we've seen a Sync at some point.
		v.seenSync = true
	case trace.EventMetric:
		m := ev.Metric()
		if !strings.Contains(m.Name, ":") {
			// Should have a ":" as per runtime/metrics convention.
			e.Errorf("invalid metric name %q", m.Name)
		}
		// Make sure the value is OK.
		if m.Value.Kind() == trace.ValueBad {
			e.Errorf("invalid value")
		}
		switch m.Value.Kind() {
		case trace.ValueUint64:
			// Just make sure it doesn't panic.
			_ = m.Value.Uint64()
		}
	case trace.EventLabel:
		l := ev.Label()

		// Check label.
		if l.Label == "" {
			e.Errorf("invalid label %q", l.Label)
		}

		// Check label resource.
		if l.Resource.Kind == trace.ResourceNone {
			e.Errorf("label resource none")
		}
		switch l.Resource.Kind {
		case trace.ResourceGoroutine:
			id := l.Resource.Goroutine()
			if _, ok := v.gs[id]; !ok {
				e.Errorf("label for invalid goroutine %d", id)
			}
		case trace.ResourceProc:
			id := l.Resource.Proc()
			if _, ok := v.ps[id]; !ok {
				e.Errorf("label for invalid proc %d", id)
			}
		case trace.ResourceThread:
			id := l.Resource.Thread()
			if _, ok := v.ms[id]; !ok {
				e.Errorf("label for invalid thread %d", id)
			}
		}
	case trace.EventStackSample:
		// Not much to check here. It's basically a sched context and a stack.
		// The sched context is also not guaranteed to align with other events.
		// We already checked the stack above.
	case trace.EventStateTransition:
		// Validate state transitions.
		//
		// TODO(mknyszek): A lot of logic is duplicated between goroutines and procs.
		// The two are intentionally handled identically; from the perspective of the
		// API, resources all have the same general properties. Consider making this
		// code generic over resources and implementing validation just once.
		tr := ev.StateTransition()
		checkStack(e, tr.Stack)
		switch tr.Resource.Kind {
		case trace.ResourceGoroutine:
			// Basic state transition validation.
			id := tr.Resource.Goroutine()
			old, new := tr.Goroutine()
			if new == trace.GoUndetermined {
				e.Errorf("transition to undetermined state for goroutine %d", id)
			}
			if v.seenSync && old == trace.GoUndetermined {
				e.Errorf("undetermined goroutine %d after first global sync", id)
			}
			if new == trace.GoNotExist && v.hasAnyRange(trace.MakeResourceID(id)) {
				e.Errorf("goroutine %d died with active ranges", id)
			}
			state, ok := v.gs[id]
			if ok {
				if old != state.state {
					e.Errorf("bad old state for goroutine %d: got %s, want %s", id, old, state.state)
				}
				state.state = new
			} else {
				if old != trace.GoUndetermined && old != trace.GoNotExist {
					e.Errorf("bad old state for unregistered goroutine %d: %s", id, old)
				}
				state = &goState{state: new}
				v.gs[id] = state
			}
			// Validate sched context.
			if new.Executing() {
				ctx := v.getOrCreateThread(e, ev.Thread())
				if ctx != nil {
					if ctx.G != trace.NoGoroutine && ctx.G != id {
						e.Errorf("tried to run goroutine %d when one was already executing (%d) on thread %d", id, ctx.G, ev.Thread())
					}
					ctx.G = id
					state.binding = ctx
				}
			} else if old.Executing() && !new.Executing() {
				if tr.Stack != ev.Stack() {
					// This is a case where the transition is happening to a goroutine that is also executing, so
					// these two stacks should always match.
					e.Errorf("StateTransition.Stack doesn't match Event.Stack")
				}
				ctx := state.binding
				if ctx != nil {
					if ctx.G != id {
						e.Errorf("tried to stop goroutine %d when it wasn't currently executing (currently executing %d) on thread %d", id, ctx.G, ev.Thread())
					}
					ctx.G = trace.NoGoroutine
					state.binding = nil
				} else {
					e.Errorf("stopping goroutine %d not bound to any active context", id)
				}
			}
		case trace.ResourceProc:
			// Basic state transition validation.
			id := tr.Resource.Proc()
			old, new := tr.Proc()
			if new == trace.ProcUndetermined {
				e.Errorf("transition to undetermined state for proc %d", id)
			}
			if v.seenSync && old == trace.ProcUndetermined {
				e.Errorf("undetermined proc %d after first global sync", id)
			}
			if new == trace.ProcNotExist && v.hasAnyRange(trace.MakeResourceID(id)) {
				e.Errorf("proc %d died with active ranges", id)
			}
			state, ok := v.ps[id]
			if ok {
				if old != state.state {
					e.Errorf("bad old state for proc %d: got %s, want %s", id, old, state.state)
				}
				state.state = new
			} else {
				if old != trace.ProcUndetermined && old != trace.ProcNotExist {
					e.Errorf("bad old state for unregistered proc %d: %s", id, old)
				}
				state = &procState{state: new}
				v.ps[id] = state
			}
			// Validate sched context.
			if new.Executing() {
				ctx := v.getOrCreateThread(e, ev.Thread())
				if ctx != nil {
					if ctx.P != trace.NoProc && ctx.P != id {
						e.Errorf("tried to run proc %d when one was already executing (%d) on thread %d", id, ctx.P, ev.Thread())
					}
					ctx.P = id
					state.binding = ctx
				}
			} else if old.Executing() && !new.Executing() {
				ctx := state.binding
				if ctx != nil {
					if ctx.P != id {
						e.Errorf("tried to stop proc %d when it wasn't currently executing (currently executing %d) on thread %d", id, ctx.P, ctx.M)
					}
					ctx.P = trace.NoProc
					state.binding = nil
				} else {
					e.Errorf("stopping proc %d not bound to any active context", id)
				}
			}
		}
	case trace.EventRangeBegin, trace.EventRangeActive, trace.EventRangeEnd:
		// Validate ranges.
		r := ev.Range()
		switch ev.Kind() {
		case trace.EventRangeBegin:
			if v.hasRange(r.Scope, r.Name) {
				e.Errorf("already active range %q on %v begun again", r.Name, r.Scope)
			}
			v.addRange(r.Scope, r.Name)
		case trace.EventRangeActive:
			if !v.hasRange(r.Scope, r.Name) {
				v.addRange(r.Scope, r.Name)
			}
		case trace.EventRangeEnd:
			if !v.hasRange(r.Scope, r.Name) {
				e.Errorf("inactive range %q on %v ended", r.Name, r.Scope)
			}
			v.deleteRange(r.Scope, r.Name)
		}
	case trace.EventTaskBegin:
		// Validate task begin.
		t := ev.Task()
		if t.ID == trace.NoTask || t.ID == trace.BackgroundTask {
			// The background task should never have an event emitted for it.
			e.Errorf("found invalid task ID for task of type %s", t.Type)
		}
		if t.Parent == trace.BackgroundTask {
			// It's not possible for a task to be a subtask of the background task.
			e.Errorf("found background task as the parent for task of type %s", t.Type)
		}
		// N.B. Don't check the task type. Empty string is a valid task type.
		v.tasks[t.ID] = t.Type
	case trace.EventTaskEnd:
		// Validate task end.
		// We can see a task end without a begin, so ignore a task without information.
		// Instead, if we've seen the task begin, just make sure the task end lines up.
		t := ev.Task()
		if typ, ok := v.tasks[t.ID]; ok {
			if t.Type != typ {
				e.Errorf("task end type %q doesn't match task start type %q for task %d", t.Type, typ, t.ID)
			}
			delete(v.tasks, t.ID)
		}
	case trace.EventLog:
		// There's really not much here to check, except that we can
		// generate a Log. The category and message are entirely user-created,
		// so we can't make any assumptions as to what they are. We also
		// can't validate the task, because proving the task's existence is very
		// much best-effort.
		_ = ev.Log()
	}
	return e.Errors()
}

func (v *Validator) hasRange(r trace.ResourceID, name string) bool {
	ranges, ok := v.ranges[r]
	return ok && slices.Contains(ranges, name)
}

func (v *Validator) addRange(r trace.ResourceID, name string) {
	ranges, _ := v.ranges[r]
	ranges = append(ranges, name)
	v.ranges[r] = ranges
}

func (v *Validator) hasAnyRange(r trace.ResourceID) bool {
	ranges, ok := v.ranges[r]
	return ok && len(ranges) != 0
}

func (v *Validator) deleteRange(r trace.ResourceID, name string) {
	ranges, ok := v.ranges[r]
	if !ok {
		return
	}
	i := slices.Index(ranges, name)
	if i < 0 {
		return
	}
	v.ranges[r] = slices.Delete(ranges, i, i+1)
}

func (v *Validator) getOrCreateThread(e *errAccumulator, m trace.ThreadID) *schedContext {
	if m == trace.NoThread {
		e.Errorf("must have thread, but thread ID is none")
		return nil
	}
	s, ok := v.ms[m]
	if !ok {
		s = &schedContext{M: m, P: trace.NoProc, G: trace.NoGoroutine}
		v.ms[m] = s
		return s
	}
	return s
}

func checkStack(e *errAccumulator, stk trace.Stack) {
	// Check for non-empty values, but we also check for crashes due to incorrect validation.
	i := 0
	stk.Frames(func(f trace.StackFrame) bool {
		if i == 0 {
			// Allow for one fully zero stack.
			//
			// TODO(mknyszek): Investigate why that happens.
			return true
		}
		if f.Func == "" || f.File == "" || f.PC == 0 || f.Line == 0 {
			e.Errorf("invalid stack frame %#v: missing information", f)
		}
		i++
		return true
	})
}

type errAccumulator struct {
	errs []error
}

func (e *errAccumulator) Errorf(f string, args ...any) {
	e.errs = append(e.errs, fmt.Errorf(f, args...))
}

func (e *errAccumulator) Errors() error {
	return errors.Join(e.errs...)
}