diff options
Diffstat (limited to 'plugins/externaltools/tools/linkparsing.py')
-rw-r--r-- | plugins/externaltools/tools/linkparsing.py | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/plugins/externaltools/tools/linkparsing.py b/plugins/externaltools/tools/linkparsing.py new file mode 100644 index 0000000..d9c09a5 --- /dev/null +++ b/plugins/externaltools/tools/linkparsing.py @@ -0,0 +1,252 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2009-2010 Per Arneng <per.arneng@anyplanet.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import re + + +class Link: + """ + This class represents a file link from within a string given by the + output of some software tool. A link contains a reference to a file, the + line number within the file and the boundaries within the given output + string that should be marked as a link. + """ + + def __init__(self, path, line_nr, col_nr, start, end): + """ + path -- the path of the file (that could be extracted) + line_nr -- the line nr of the specified file + col_nr -- the col nr of the specific file + start -- the index within the string that the link starts at + end -- the index within the string where the link ends at + """ + self.path = path + self.line_nr = int(line_nr) + self.col_nr = int(col_nr) + self.start = start + self.end = end + + def __repr__(self): + return "%s[%s][%s](%s:%s)" % (self.path, self.line_nr, self.col_nr, + self.start, self.end) + + +class LinkParser: + """ + Parses a text using different parsing providers with the goal of finding one + or more file links within the text. A typical example could be the output + from a compiler that specifies an error in a specific file. The path of the + file, the line nr and some more info is then returned so that it can be used + to be able to navigate from the error output in to the specific file. + + The actual work of parsing the text is done by instances of classes that + inherits from AbstractLinkParser or by regular expressions. To add a new + parser just create a class that inherits from AbstractLinkParser and then + register in this class cunstructor using the method add_parser. If you want + to add a regular expression then just call add_regexp in this class + constructor and provide your regexp string as argument. + """ + + def __init__(self): + self._providers = [] + self.add_regexp(REGEXP_STANDARD) + self.add_regexp(REGEXP_PYTHON) + self.add_regexp(REGEXP_VALAC) + self.add_regexp(REGEXP_BASH) + self.add_regexp(REGEXP_RUBY) + self.add_regexp(REGEXP_PERL) + self.add_regexp(REGEXP_MCS) + + def add_parser(self, parser): + self._providers.append(parser) + + def add_regexp(self, regexp): + """ + Adds a regular expression string that should match a link using + re.MULTILINE and re.VERBOSE regexp. The area marked as a link should + be captured by a group named lnk. The path of the link should be + captured by a group named pth. The line number should be captured by + a group named ln. To read more about this look at the documentation + for the RegexpLinkParser constructor. + """ + self.add_parser(RegexpLinkParser(regexp)) + + def parse(self, text): + """ + Parses the given text and returns a list of links that are parsed from + the text. This method delegates to parser providers that can parse + output from different kinds of formats. If no links are found then an + empty list is returned. + + text -- the text to scan for file links. 'text' can not be None. + """ + if text is None: + raise ValueError("text can not be None") + + links = [] + + for provider in self._providers: + links.extend(provider.parse(text)) + + return links + + +class AbstractLinkParser(object): + """The "abstract" base class for link parses""" + + def parse(self, text): + """ + This method should be implemented by subclasses. It takes a text as + argument (never None) and then returns a list of Link objects. If no + links are found then an empty list is expected. The Link class is + defined in this module. If you do not override this method then a + NotImplementedError will be thrown. + + text -- the text to parse. This argument is never None. + """ + raise NotImplementedError("need to implement a parse method") + + +class RegexpLinkParser(AbstractLinkParser): + """ + A class that represents parsers that only use one single regular expression. + It can be used by subclasses or by itself. See the constructor documentation + for details about the rules surrouning the regexp. + """ + + def __init__(self, regex): + """ + Creates a new RegexpLinkParser based on the given regular expression. + The regular expression is multiline and verbose (se python docs on + compilation flags). The regular expression should contain three named + capturing groups 'lnk', 'pth' and 'ln'. 'lnk' represents the area wich + should be marked as a link in the text. 'pth' is the path that should + be looked for and 'ln' is the line number in that file. + """ + self.re = re.compile(regex, re.MULTILINE | re.VERBOSE) + + def parse(self, text): + links = [] + for m in re.finditer(self.re, text): + groups = m.groups() + + path = m.group("pth") + line_nr = m.group("ln") + start = m.start("lnk") + end = m.end("lnk") + + # some regexes may have a col group + if len(groups) > 3 and groups[3] != None: + col_nr = m.group("col") + else: + col_nr = 0 + + link = Link(path, line_nr, col_nr, start, end) + links.append(link) + + return links + +# gcc 'test.c:13: warning: ...' +# grep 'test.c:5:int main(...' +# javac 'Test.java:13: ...' +# ruby 'test.rb:5: ...' +# scalac 'Test.scala:5: ...' +# sbt (scala) '[error] test.scala:4: ...' +# 6g (go) 'test.go:9: ...' +REGEXP_STANDARD = r""" +^ +(?:\[(?:error|warn)\]\ )? +(?P<lnk> + (?P<pth> [^ \:\n]* ) + \: + (?P<ln> \d+) + \:? + (?P<col> \d+)? +) +\:""" + +# python ' File "test.py", line 13' +REGEXP_PYTHON = r""" +^\s\sFile\s +(?P<lnk> + \" + (?P<pth> [^\"]+ ) + \",\sline\s + (?P<ln> \d+ ) +)""" + +# python 'test.sh: line 5:' +REGEXP_BASH = r""" +^(?P<lnk> + (?P<pth> .* ) + \:\sline\s + (?P<ln> \d+ ) +)\:""" + +# valac 'Test.vala:13.1-13.3: ...' +REGEXP_VALAC = r""" +^(?P<lnk> + (?P<pth> + .*vala + ) + \: + (?P<ln> + \d+ + ) + \.\d+-\d+\.\d+ + )\: """ + +#ruby +#test.rb:5: ... +# from test.rb:3:in `each' +# fist line parsed by REGEXP_STANDARD +REGEXP_RUBY = r""" +^\s+from\s +(?P<lnk> + (?P<pth> + .* + ) + \: + (?P<ln> + \d+ + ) + )""" + +# perl 'syntax error at test.pl line 88, near "$fake_var' +REGEXP_PERL = r""" +\sat\s +(?P<lnk> + (?P<pth> .* ) + \sline\s + (?P<ln> \d+ ) +)""" + +# mcs (C#) 'Test.cs(12,7): error CS0103: The name `fakeMethod' +# fpc (Pascal) 'hello.pas(11,1) Fatal: Syntax error, ":" expected but "BEGIN"' +REGEXP_MCS = r""" +^ +(?P<lnk> + (?P<pth> \S+ ) + \( + (?P<ln> \d+ ) + ,\d+\) +) +\:?\s +""" + +# ex:ts=4:et: |