summaryrefslogtreecommitdiffstats
path: root/plugins/externaltools/tools/linkparsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/externaltools/tools/linkparsing.py')
-rw-r--r--plugins/externaltools/tools/linkparsing.py252
1 files changed, 252 insertions, 0 deletions
diff --git a/plugins/externaltools/tools/linkparsing.py b/plugins/externaltools/tools/linkparsing.py
new file mode 100644
index 0000000..d9c09a5
--- /dev/null
+++ b/plugins/externaltools/tools/linkparsing.py
@@ -0,0 +1,252 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2009-2010 Per Arneng <per.arneng@anyplanet.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import re
+
+
+class Link:
+ """
+ This class represents a file link from within a string given by the
+ output of some software tool. A link contains a reference to a file, the
+ line number within the file and the boundaries within the given output
+ string that should be marked as a link.
+ """
+
+ def __init__(self, path, line_nr, col_nr, start, end):
+ """
+ path -- the path of the file (that could be extracted)
+ line_nr -- the line nr of the specified file
+ col_nr -- the col nr of the specific file
+ start -- the index within the string that the link starts at
+ end -- the index within the string where the link ends at
+ """
+ self.path = path
+ self.line_nr = int(line_nr)
+ self.col_nr = int(col_nr)
+ self.start = start
+ self.end = end
+
+ def __repr__(self):
+ return "%s[%s][%s](%s:%s)" % (self.path, self.line_nr, self.col_nr,
+ self.start, self.end)
+
+
+class LinkParser:
+ """
+ Parses a text using different parsing providers with the goal of finding one
+ or more file links within the text. A typical example could be the output
+ from a compiler that specifies an error in a specific file. The path of the
+ file, the line nr and some more info is then returned so that it can be used
+ to be able to navigate from the error output in to the specific file.
+
+ The actual work of parsing the text is done by instances of classes that
+ inherits from AbstractLinkParser or by regular expressions. To add a new
+ parser just create a class that inherits from AbstractLinkParser and then
+ register in this class cunstructor using the method add_parser. If you want
+ to add a regular expression then just call add_regexp in this class
+ constructor and provide your regexp string as argument.
+ """
+
+ def __init__(self):
+ self._providers = []
+ self.add_regexp(REGEXP_STANDARD)
+ self.add_regexp(REGEXP_PYTHON)
+ self.add_regexp(REGEXP_VALAC)
+ self.add_regexp(REGEXP_BASH)
+ self.add_regexp(REGEXP_RUBY)
+ self.add_regexp(REGEXP_PERL)
+ self.add_regexp(REGEXP_MCS)
+
+ def add_parser(self, parser):
+ self._providers.append(parser)
+
+ def add_regexp(self, regexp):
+ """
+ Adds a regular expression string that should match a link using
+ re.MULTILINE and re.VERBOSE regexp. The area marked as a link should
+ be captured by a group named lnk. The path of the link should be
+ captured by a group named pth. The line number should be captured by
+ a group named ln. To read more about this look at the documentation
+ for the RegexpLinkParser constructor.
+ """
+ self.add_parser(RegexpLinkParser(regexp))
+
+ def parse(self, text):
+ """
+ Parses the given text and returns a list of links that are parsed from
+ the text. This method delegates to parser providers that can parse
+ output from different kinds of formats. If no links are found then an
+ empty list is returned.
+
+ text -- the text to scan for file links. 'text' can not be None.
+ """
+ if text is None:
+ raise ValueError("text can not be None")
+
+ links = []
+
+ for provider in self._providers:
+ links.extend(provider.parse(text))
+
+ return links
+
+
+class AbstractLinkParser(object):
+ """The "abstract" base class for link parses"""
+
+ def parse(self, text):
+ """
+ This method should be implemented by subclasses. It takes a text as
+ argument (never None) and then returns a list of Link objects. If no
+ links are found then an empty list is expected. The Link class is
+ defined in this module. If you do not override this method then a
+ NotImplementedError will be thrown.
+
+ text -- the text to parse. This argument is never None.
+ """
+ raise NotImplementedError("need to implement a parse method")
+
+
+class RegexpLinkParser(AbstractLinkParser):
+ """
+ A class that represents parsers that only use one single regular expression.
+ It can be used by subclasses or by itself. See the constructor documentation
+ for details about the rules surrouning the regexp.
+ """
+
+ def __init__(self, regex):
+ """
+ Creates a new RegexpLinkParser based on the given regular expression.
+ The regular expression is multiline and verbose (se python docs on
+ compilation flags). The regular expression should contain three named
+ capturing groups 'lnk', 'pth' and 'ln'. 'lnk' represents the area wich
+ should be marked as a link in the text. 'pth' is the path that should
+ be looked for and 'ln' is the line number in that file.
+ """
+ self.re = re.compile(regex, re.MULTILINE | re.VERBOSE)
+
+ def parse(self, text):
+ links = []
+ for m in re.finditer(self.re, text):
+ groups = m.groups()
+
+ path = m.group("pth")
+ line_nr = m.group("ln")
+ start = m.start("lnk")
+ end = m.end("lnk")
+
+ # some regexes may have a col group
+ if len(groups) > 3 and groups[3] != None:
+ col_nr = m.group("col")
+ else:
+ col_nr = 0
+
+ link = Link(path, line_nr, col_nr, start, end)
+ links.append(link)
+
+ return links
+
+# gcc 'test.c:13: warning: ...'
+# grep 'test.c:5:int main(...'
+# javac 'Test.java:13: ...'
+# ruby 'test.rb:5: ...'
+# scalac 'Test.scala:5: ...'
+# sbt (scala) '[error] test.scala:4: ...'
+# 6g (go) 'test.go:9: ...'
+REGEXP_STANDARD = r"""
+^
+(?:\[(?:error|warn)\]\ )?
+(?P<lnk>
+ (?P<pth> [^ \:\n]* )
+ \:
+ (?P<ln> \d+)
+ \:?
+ (?P<col> \d+)?
+)
+\:"""
+
+# python ' File "test.py", line 13'
+REGEXP_PYTHON = r"""
+^\s\sFile\s
+(?P<lnk>
+ \"
+ (?P<pth> [^\"]+ )
+ \",\sline\s
+ (?P<ln> \d+ )
+)"""
+
+# python 'test.sh: line 5:'
+REGEXP_BASH = r"""
+^(?P<lnk>
+ (?P<pth> .* )
+ \:\sline\s
+ (?P<ln> \d+ )
+)\:"""
+
+# valac 'Test.vala:13.1-13.3: ...'
+REGEXP_VALAC = r"""
+^(?P<lnk>
+ (?P<pth>
+ .*vala
+ )
+ \:
+ (?P<ln>
+ \d+
+ )
+ \.\d+-\d+\.\d+
+ )\: """
+
+#ruby
+#test.rb:5: ...
+# from test.rb:3:in `each'
+# fist line parsed by REGEXP_STANDARD
+REGEXP_RUBY = r"""
+^\s+from\s
+(?P<lnk>
+ (?P<pth>
+ .*
+ )
+ \:
+ (?P<ln>
+ \d+
+ )
+ )"""
+
+# perl 'syntax error at test.pl line 88, near "$fake_var'
+REGEXP_PERL = r"""
+\sat\s
+(?P<lnk>
+ (?P<pth> .* )
+ \sline\s
+ (?P<ln> \d+ )
+)"""
+
+# mcs (C#) 'Test.cs(12,7): error CS0103: The name `fakeMethod'
+# fpc (Pascal) 'hello.pas(11,1) Fatal: Syntax error, ":" expected but "BEGIN"'
+REGEXP_MCS = r"""
+^
+(?P<lnk>
+ (?P<pth> \S+ )
+ \(
+ (?P<ln> \d+ )
+ ,\d+\)
+)
+\:?\s
+"""
+
+# ex:ts=4:et: