1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
|
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2010 Per Arneng <per.arneng@anyplanet.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
import re
class Link:
"""
This class represents a file link from within a string given by the
output of some software tool. A link contains a reference to a file, the
line number within the file and the boundaries within the given output
string that should be marked as a link.
"""
def __init__(self, path, line_nr, col_nr, start, end):
"""
path -- the path of the file (that could be extracted)
line_nr -- the line nr of the specified file
col_nr -- the col nr of the specific file
start -- the index within the string that the link starts at
end -- the index within the string where the link ends at
"""
self.path = path
self.line_nr = int(line_nr)
self.col_nr = int(col_nr)
self.start = start
self.end = end
def __repr__(self):
return "%s[%s][%s](%s:%s)" % (self.path, self.line_nr, self.col_nr,
self.start, self.end)
class LinkParser:
"""
Parses a text using different parsing providers with the goal of finding one
or more file links within the text. A typical example could be the output
from a compiler that specifies an error in a specific file. The path of the
file, the line nr and some more info is then returned so that it can be used
to be able to navigate from the error output in to the specific file.
The actual work of parsing the text is done by instances of classes that
inherits from AbstractLinkParser or by regular expressions. To add a new
parser just create a class that inherits from AbstractLinkParser and then
register in this class cunstructor using the method add_parser. If you want
to add a regular expression then just call add_regexp in this class
constructor and provide your regexp string as argument.
"""
def __init__(self):
self._providers = []
self.add_regexp(REGEXP_STANDARD)
self.add_regexp(REGEXP_PYTHON)
self.add_regexp(REGEXP_VALAC)
self.add_regexp(REGEXP_BASH)
self.add_regexp(REGEXP_RUBY)
self.add_regexp(REGEXP_PERL)
self.add_regexp(REGEXP_MCS)
def add_parser(self, parser):
self._providers.append(parser)
def add_regexp(self, regexp):
"""
Adds a regular expression string that should match a link using
re.MULTILINE and re.VERBOSE regexp. The area marked as a link should
be captured by a group named lnk. The path of the link should be
captured by a group named pth. The line number should be captured by
a group named ln. To read more about this look at the documentation
for the RegexpLinkParser constructor.
"""
self.add_parser(RegexpLinkParser(regexp))
def parse(self, text):
"""
Parses the given text and returns a list of links that are parsed from
the text. This method delegates to parser providers that can parse
output from different kinds of formats. If no links are found then an
empty list is returned.
text -- the text to scan for file links. 'text' can not be None.
"""
if text is None:
raise ValueError("text can not be None")
links = []
for provider in self._providers:
links.extend(provider.parse(text))
return links
class AbstractLinkParser(object):
"""The "abstract" base class for link parses"""
def parse(self, text):
"""
This method should be implemented by subclasses. It takes a text as
argument (never None) and then returns a list of Link objects. If no
links are found then an empty list is expected. The Link class is
defined in this module. If you do not override this method then a
NotImplementedError will be thrown.
text -- the text to parse. This argument is never None.
"""
raise NotImplementedError("need to implement a parse method")
class RegexpLinkParser(AbstractLinkParser):
"""
A class that represents parsers that only use one single regular expression.
It can be used by subclasses or by itself. See the constructor documentation
for details about the rules surrouning the regexp.
"""
def __init__(self, regex):
"""
Creates a new RegexpLinkParser based on the given regular expression.
The regular expression is multiline and verbose (se python docs on
compilation flags). The regular expression should contain three named
capturing groups 'lnk', 'pth' and 'ln'. 'lnk' represents the area wich
should be marked as a link in the text. 'pth' is the path that should
be looked for and 'ln' is the line number in that file.
"""
self.re = re.compile(regex, re.MULTILINE | re.VERBOSE)
def parse(self, text):
links = []
for m in re.finditer(self.re, text):
groups = m.groups()
path = m.group("pth")
line_nr = m.group("ln")
start = m.start("lnk")
end = m.end("lnk")
# some regexes may have a col group
if len(groups) > 3 and groups[3] != None:
col_nr = m.group("col")
else:
col_nr = 0
link = Link(path, line_nr, col_nr, start, end)
links.append(link)
return links
# gcc 'test.c:13: warning: ...'
# grep 'test.c:5:int main(...'
# javac 'Test.java:13: ...'
# ruby 'test.rb:5: ...'
# scalac 'Test.scala:5: ...'
# sbt (scala) '[error] test.scala:4: ...'
# 6g (go) 'test.go:9: ...'
REGEXP_STANDARD = r"""
^
(?:\[(?:error|warn)\]\ )?
(?P<lnk>
(?P<pth> [^ \:\n]* )
\:
(?P<ln> \d+)
\:?
(?P<col> \d+)?
)
\:"""
# python ' File "test.py", line 13'
REGEXP_PYTHON = r"""
^\s\sFile\s
(?P<lnk>
\"
(?P<pth> [^\"]+ )
\",\sline\s
(?P<ln> \d+ )
)"""
# python 'test.sh: line 5:'
REGEXP_BASH = r"""
^(?P<lnk>
(?P<pth> .* )
\:\sline\s
(?P<ln> \d+ )
)\:"""
# valac 'Test.vala:13.1-13.3: ...'
REGEXP_VALAC = r"""
^(?P<lnk>
(?P<pth>
.*vala
)
\:
(?P<ln>
\d+
)
\.\d+-\d+\.\d+
)\: """
#ruby
#test.rb:5: ...
# from test.rb:3:in `each'
# fist line parsed by REGEXP_STANDARD
REGEXP_RUBY = r"""
^\s+from\s
(?P<lnk>
(?P<pth>
.*
)
\:
(?P<ln>
\d+
)
)"""
# perl 'syntax error at test.pl line 88, near "$fake_var'
REGEXP_PERL = r"""
\sat\s
(?P<lnk>
(?P<pth> .* )
\sline\s
(?P<ln> \d+ )
)"""
# mcs (C#) 'Test.cs(12,7): error CS0103: The name `fakeMethod'
# fpc (Pascal) 'hello.pas(11,1) Fatal: Syntax error, ":" expected but "BEGIN"'
REGEXP_MCS = r"""
^
(?P<lnk>
(?P<pth> \S+ )
\(
(?P<ln> \d+ )
,\d+\)
)
\:?\s
"""
# ex:ts=4:et:
|