# -*- coding: utf-8 -*- # # Copyright (C) 2009-2010 Per Arneng # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA import re class Link: """ This class represents a file link from within a string given by the output of some software tool. A link contains a reference to a file, the line number within the file and the boundaries within the given output string that should be marked as a link. """ def __init__(self, path, line_nr, col_nr, start, end): """ path -- the path of the file (that could be extracted) line_nr -- the line nr of the specified file col_nr -- the col nr of the specific file start -- the index within the string that the link starts at end -- the index within the string where the link ends at """ self.path = path self.line_nr = int(line_nr) self.col_nr = int(col_nr) self.start = start self.end = end def __repr__(self): return "%s[%s][%s](%s:%s)" % (self.path, self.line_nr, self.col_nr, self.start, self.end) class LinkParser: """ Parses a text using different parsing providers with the goal of finding one or more file links within the text. A typical example could be the output from a compiler that specifies an error in a specific file. The path of the file, the line nr and some more info is then returned so that it can be used to be able to navigate from the error output in to the specific file. The actual work of parsing the text is done by instances of classes that inherits from AbstractLinkParser or by regular expressions. To add a new parser just create a class that inherits from AbstractLinkParser and then register in this class cunstructor using the method add_parser. If you want to add a regular expression then just call add_regexp in this class constructor and provide your regexp string as argument. """ def __init__(self): self._providers = [] self.add_regexp(REGEXP_STANDARD) self.add_regexp(REGEXP_PYTHON) self.add_regexp(REGEXP_VALAC) self.add_regexp(REGEXP_BASH) self.add_regexp(REGEXP_RUBY) self.add_regexp(REGEXP_PERL) self.add_regexp(REGEXP_MCS) def add_parser(self, parser): self._providers.append(parser) def add_regexp(self, regexp): """ Adds a regular expression string that should match a link using re.MULTILINE and re.VERBOSE regexp. The area marked as a link should be captured by a group named lnk. The path of the link should be captured by a group named pth. The line number should be captured by a group named ln. To read more about this look at the documentation for the RegexpLinkParser constructor. """ self.add_parser(RegexpLinkParser(regexp)) def parse(self, text): """ Parses the given text and returns a list of links that are parsed from the text. This method delegates to parser providers that can parse output from different kinds of formats. If no links are found then an empty list is returned. text -- the text to scan for file links. 'text' can not be None. """ if text is None: raise ValueError("text can not be None") links = [] for provider in self._providers: links.extend(provider.parse(text)) return links class AbstractLinkParser(object): """The "abstract" base class for link parses""" def parse(self, text): """ This method should be implemented by subclasses. It takes a text as argument (never None) and then returns a list of Link objects. If no links are found then an empty list is expected. The Link class is defined in this module. If you do not override this method then a NotImplementedError will be thrown. text -- the text to parse. This argument is never None. """ raise NotImplementedError("need to implement a parse method") class RegexpLinkParser(AbstractLinkParser): """ A class that represents parsers that only use one single regular expression. It can be used by subclasses or by itself. See the constructor documentation for details about the rules surrouning the regexp. """ def __init__(self, regex): """ Creates a new RegexpLinkParser based on the given regular expression. The regular expression is multiline and verbose (se python docs on compilation flags). The regular expression should contain three named capturing groups 'lnk', 'pth' and 'ln'. 'lnk' represents the area wich should be marked as a link in the text. 'pth' is the path that should be looked for and 'ln' is the line number in that file. """ self.re = re.compile(regex, re.MULTILINE | re.VERBOSE) def parse(self, text): links = [] for m in re.finditer(self.re, text): groups = m.groups() path = m.group("pth") line_nr = m.group("ln") start = m.start("lnk") end = m.end("lnk") # some regexes may have a col group if len(groups) > 3 and groups[3] != None: col_nr = m.group("col") else: col_nr = 0 link = Link(path, line_nr, col_nr, start, end) links.append(link) return links # gcc 'test.c:13: warning: ...' # grep 'test.c:5:int main(...' # javac 'Test.java:13: ...' # ruby 'test.rb:5: ...' # scalac 'Test.scala:5: ...' # sbt (scala) '[error] test.scala:4: ...' # 6g (go) 'test.go:9: ...' REGEXP_STANDARD = r""" ^ (?:\[(?:error|warn)\]\ )? (?P (?P [^ \:\n]* ) \: (?P \d+) \:? (?P \d+)? ) \:""" # python ' File "test.py", line 13' REGEXP_PYTHON = r""" ^\s\sFile\s (?P \" (?P [^\"]+ ) \",\sline\s (?P \d+ ) )""" # python 'test.sh: line 5:' REGEXP_BASH = r""" ^(?P (?P .* ) \:\sline\s (?P \d+ ) )\:""" # valac 'Test.vala:13.1-13.3: ...' REGEXP_VALAC = r""" ^(?P (?P .*vala ) \: (?P \d+ ) \.\d+-\d+\.\d+ )\: """ #ruby #test.rb:5: ... # from test.rb:3:in `each' # fist line parsed by REGEXP_STANDARD REGEXP_RUBY = r""" ^\s+from\s (?P (?P .* ) \: (?P \d+ ) )""" # perl 'syntax error at test.pl line 88, near "$fake_var' REGEXP_PERL = r""" \sat\s (?P (?P .* ) \sline\s (?P \d+ ) )""" # mcs (C#) 'Test.cs(12,7): error CS0103: The name `fakeMethod' # fpc (Pascal) 'hello.pas(11,1) Fatal: Syntax error, ":" expected but "BEGIN"' REGEXP_MCS = r""" ^ (?P (?P \S+ ) \( (?P \d+ ) ,\d+\) ) \:?\s """ # ex:ts=4:et: