summaryrefslogtreecommitdiffstats
path: root/src/go/ast/commentmap.go
blob: 4196e475d93fa0bb65c34d43256ab38e44663c59 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package ast

import (
	"bytes"
	"fmt"
	"go/token"
	"sort"
	"strings"
)

type byPos []*CommentGroup

func (a byPos) Len() int           { return len(a) }
func (a byPos) Less(i, j int) bool { return a[i].Pos() < a[j].Pos() }
func (a byPos) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }

// sortComments sorts the list of comment groups in source order.
func sortComments(list []*CommentGroup) {
	// TODO(gri): Does it make sense to check for sorted-ness
	//            first (because we know that sorted-ness is
	//            very likely)?
	if orderedList := byPos(list); !sort.IsSorted(orderedList) {
		sort.Sort(orderedList)
	}
}

// A CommentMap maps an AST node to a list of comment groups
// associated with it. See NewCommentMap for a description of
// the association.
type CommentMap map[Node][]*CommentGroup

func (cmap CommentMap) addComment(n Node, c *CommentGroup) {
	list := cmap[n]
	if len(list) == 0 {
		list = []*CommentGroup{c}
	} else {
		list = append(list, c)
	}
	cmap[n] = list
}

type byInterval []Node

func (a byInterval) Len() int { return len(a) }
func (a byInterval) Less(i, j int) bool {
	pi, pj := a[i].Pos(), a[j].Pos()
	return pi < pj || pi == pj && a[i].End() > a[j].End()
}
func (a byInterval) Swap(i, j int) { a[i], a[j] = a[j], a[i] }

// nodeList returns the list of nodes of the AST n in source order.
func nodeList(n Node) []Node {
	var list []Node
	Inspect(n, func(n Node) bool {
		// don't collect comments
		switch n.(type) {
		case nil, *CommentGroup, *Comment:
			return false
		}
		list = append(list, n)
		return true
	})
	// Note: The current implementation assumes that Inspect traverses the
	//       AST in depth-first and thus _source_ order. If AST traversal
	//       does not follow source order, the sorting call below will be
	//       required.
	// sort.Sort(byInterval(list))
	return list
}

// A commentListReader helps iterating through a list of comment groups.
type commentListReader struct {
	fset     *token.FileSet
	list     []*CommentGroup
	index    int
	comment  *CommentGroup  // comment group at current index
	pos, end token.Position // source interval of comment group at current index
}

func (r *commentListReader) eol() bool {
	return r.index >= len(r.list)
}

func (r *commentListReader) next() {
	if !r.eol() {
		r.comment = r.list[r.index]
		r.pos = r.fset.Position(r.comment.Pos())
		r.end = r.fset.Position(r.comment.End())
		r.index++
	}
}

// A nodeStack keeps track of nested nodes.
// A node lower on the stack lexically contains the nodes higher on the stack.
type nodeStack []Node

// push pops all nodes that appear lexically before n
// and then pushes n on the stack.
func (s *nodeStack) push(n Node) {
	s.pop(n.Pos())
	*s = append((*s), n)
}

// pop pops all nodes that appear lexically before pos
// (i.e., whose lexical extent has ended before or at pos).
// It returns the last node popped.
func (s *nodeStack) pop(pos token.Pos) (top Node) {
	i := len(*s)
	for i > 0 && (*s)[i-1].End() <= pos {
		top = (*s)[i-1]
		i--
	}
	*s = (*s)[0:i]
	return top
}

// NewCommentMap creates a new comment map by associating comment groups
// of the comments list with the nodes of the AST specified by node.
//
// A comment group g is associated with a node n if:
//
//   - g starts on the same line as n ends
//   - g starts on the line immediately following n, and there is
//     at least one empty line after g and before the next node
//   - g starts before n and is not associated to the node before n
//     via the previous rules
//
// NewCommentMap tries to associate a comment group to the "largest"
// node possible: For instance, if the comment is a line comment
// trailing an assignment, the comment is associated with the entire
// assignment rather than just the last operand in the assignment.
func NewCommentMap(fset *token.FileSet, node Node, comments []*CommentGroup) CommentMap {
	if len(comments) == 0 {
		return nil // no comments to map
	}

	cmap := make(CommentMap)

	// set up comment reader r
	tmp := make([]*CommentGroup, len(comments))
	copy(tmp, comments) // don't change incoming comments
	sortComments(tmp)
	r := commentListReader{fset: fset, list: tmp} // !r.eol() because len(comments) > 0
	r.next()

	// create node list in lexical order
	nodes := nodeList(node)
	nodes = append(nodes, nil) // append sentinel

	// set up iteration variables
	var (
		p     Node           // previous node
		pend  token.Position // end of p
		pg    Node           // previous node group (enclosing nodes of "importance")
		pgend token.Position // end of pg
		stack nodeStack      // stack of node groups
	)

	for _, q := range nodes {
		var qpos token.Position
		if q != nil {
			qpos = fset.Position(q.Pos()) // current node position
		} else {
			// set fake sentinel position to infinity so that
			// all comments get processed before the sentinel
			const infinity = 1 << 30
			qpos.Offset = infinity
			qpos.Line = infinity
		}

		// process comments before current node
		for r.end.Offset <= qpos.Offset {
			// determine recent node group
			if top := stack.pop(r.comment.Pos()); top != nil {
				pg = top
				pgend = fset.Position(pg.End())
			}
			// Try to associate a comment first with a node group
			// (i.e., a node of "importance" such as a declaration);
			// if that fails, try to associate it with the most recent
			// node.
			// TODO(gri) try to simplify the logic below
			var assoc Node
			switch {
			case pg != nil &&
				(pgend.Line == r.pos.Line ||
					pgend.Line+1 == r.pos.Line && r.end.Line+1 < qpos.Line):
				// 1) comment starts on same line as previous node group ends, or
				// 2) comment starts on the line immediately after the
				//    previous node group and there is an empty line before
				//    the current node
				// => associate comment with previous node group
				assoc = pg
			case p != nil &&
				(pend.Line == r.pos.Line ||
					pend.Line+1 == r.pos.Line && r.end.Line+1 < qpos.Line ||
					q == nil):
				// same rules apply as above for p rather than pg,
				// but also associate with p if we are at the end (q == nil)
				assoc = p
			default:
				// otherwise, associate comment with current node
				if q == nil {
					// we can only reach here if there was no p
					// which would imply that there were no nodes
					panic("internal error: no comments should be associated with sentinel")
				}
				assoc = q
			}
			cmap.addComment(assoc, r.comment)
			if r.eol() {
				return cmap
			}
			r.next()
		}

		// update previous node
		p = q
		pend = fset.Position(p.End())

		// update previous node group if we see an "important" node
		switch q.(type) {
		case *File, *Field, Decl, Spec, Stmt:
			stack.push(q)
		}
	}

	return cmap
}

// Update replaces an old node in the comment map with the new node
// and returns the new node. Comments that were associated with the
// old node are associated with the new node.
func (cmap CommentMap) Update(old, new Node) Node {
	if list := cmap[old]; len(list) > 0 {
		delete(cmap, old)
		cmap[new] = append(cmap[new], list...)
	}
	return new
}

// Filter returns a new comment map consisting of only those
// entries of cmap for which a corresponding node exists in
// the AST specified by node.
func (cmap CommentMap) Filter(node Node) CommentMap {
	umap := make(CommentMap)
	Inspect(node, func(n Node) bool {
		if g := cmap[n]; len(g) > 0 {
			umap[n] = g
		}
		return true
	})
	return umap
}

// Comments returns the list of comment groups in the comment map.
// The result is sorted in source order.
func (cmap CommentMap) Comments() []*CommentGroup {
	list := make([]*CommentGroup, 0, len(cmap))
	for _, e := range cmap {
		list = append(list, e...)
	}
	sortComments(list)
	return list
}

func summary(list []*CommentGroup) string {
	const maxLen = 40
	var buf bytes.Buffer

	// collect comments text
loop:
	for _, group := range list {
		// Note: CommentGroup.Text() does too much work for what we
		//       need and would only replace this innermost loop.
		//       Just do it explicitly.
		for _, comment := range group.List {
			if buf.Len() >= maxLen {
				break loop
			}
			buf.WriteString(comment.Text)
		}
	}

	// truncate if too long
	if buf.Len() > maxLen {
		buf.Truncate(maxLen - 3)
		buf.WriteString("...")
	}

	// replace any invisibles with blanks
	bytes := buf.Bytes()
	for i, b := range bytes {
		switch b {
		case '\t', '\n', '\r':
			bytes[i] = ' '
		}
	}

	return string(bytes)
}

func (cmap CommentMap) String() string {
	// print map entries in sorted order
	var nodes []Node
	for node := range cmap {
		nodes = append(nodes, node)
	}
	sort.Sort(byInterval(nodes))

	var buf strings.Builder
	fmt.Fprintln(&buf, "CommentMap {")
	for _, node := range nodes {
		comment := cmap[node]
		// print name of identifiers; print node type for other nodes
		var s string
		if ident, ok := node.(*Ident); ok {
			s = ident.Name
		} else {
			s = fmt.Sprintf("%T", node)
		}
		fmt.Fprintf(&buf, "\t%p  %20s:  %s\n", node, s, summary(comment))
	}
	fmt.Fprintln(&buf, "}")
	return buf.String()
}