diff options
Diffstat (limited to 'plugins/snippets/snippets/parser.py')
-rw-r--r-- | plugins/snippets/snippets/parser.py | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/plugins/snippets/snippets/parser.py b/plugins/snippets/snippets/parser.py new file mode 100644 index 0000000..2b6043d --- /dev/null +++ b/plugins/snippets/snippets/parser.py @@ -0,0 +1,256 @@ +# Gedit snippets plugin +# Copyright (C) 2006-2007 Jesse van den Kieboom <jesse@icecrew.nl> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import re + +class Token: + def __init__(self, klass, data): + self.klass = klass + self.data = data + + def __str__(self): + return '%s: [%s]' % (self.klass, self.data) + + def __eq__(self, other): + return self.klass == other.klass and self.data == other.data + + def __ne__(self, other): + return not self.__eq__(other) + +class Parser: + SREG_ENV = '[A-Z_]+' + SREG_ID = '[0-9]+' + + REG_ESCAPE = re.compile('(\\$(%s|\\(|\\{|<|%s)|`|\\\\)' % (SREG_ENV, SREG_ID)) + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + self.position = 0 + self.data_length = len(self.data) + + self.RULES = (self._match_env, self._match_regex, self._match_placeholder, self._match_shell, self._match_eval, self._text) + + def remains(self): + return self.data[self.position:] + + def next_char(self): + if self.position + 1 >= self.data_length: + return '' + else: + return self.data[self.position + 1] + + def char(self): + if self.position >= self.data_length: + return '' + else: + return self.data[self.position] + + def token(self): + self.tktext = '' + + while self.position < self.data_length: + try: + # Get first character + func = {'$': self._rule, + '`': self._try_match_shell}[self.char()] + except: + func = self._text + + # Detect end of text token + if func != self._text and self.tktext != '': + return Token('text', self.tktext) + + tk = func() + + if tk: + return tk + + if self.tktext != '': + return Token('text', self.tktext) + + def _need_escape(self): + text = self.remains()[1:] + + if text == '': + return False + + return self.REG_ESCAPE.match(text) + + def _escape(self): + if not self._need_escape(): + return + + # Increase position with 1 + self.position += 1 + + def _text(self): + if self.char() == '\\': + self._escape() + + self.tktext += self.char() + self.position += 1 + + def _rule(self): + for rule in self.RULES: + res = rule() + + if res: + return res + + def _match_env(self): + text = self.remains() + match = re.match('\\$(%s)' % self.SREG_ENV, text) or re.match('\\${(%s)}' % self.SREG_ENV, text) + + if match: + self.position += len(match.group(0)) + return Token('environment', match.group(1)) + + def _parse_list(self, lst): + pos = 0 + length = len(lst) + items = [] + last = None + + while pos < length: + char = lst[pos] + next = pos < length - 1 and lst[pos + 1] + + if char == '\\' and (next == ',' or next == ']'): + char = next + pos += 1 + elif char == ',': + if last != None: + items.append(last) + + last = None + pos += 1 + continue + + last = (last != None and last + char) or char + pos += 1 + + if last != None: + items.append(last) + + return items + + def _parse_default(self, default): + match = re.match('^\\s*(\\\\)?(\\[((\\\\]|[^\\]])+)\\]\\s*)$', default) + + if not match: + return [default] + + groups = match.groups() + + if groups[0]: + return [groups[1]] + + return self._parse_list(groups[2]) + + def _match_placeholder(self): + text = self.remains() + + match = re.match('\\${(%s)(:((\\\\\\}|[^}])+))?}' % self.SREG_ID, text) or re.match('\\$(%s)' % self.SREG_ID, text) + + if not match: + return None + + groups = match.groups() + default = '' + tabstop = int(groups[0]) + self.position += len(match.group(0)) + + if len(groups) > 1 and groups[2]: + default = self._parse_default(groups[2].replace('\\}', '}')) + + return Token('placeholder', {'tabstop': tabstop, 'default': default}) + + def _match_shell(self): + text = self.remains() + match = re.match('`((%s):)?((\\\\`|[^`])+?)`' % self.SREG_ID, text) or re.match('\\$\\(((%s):)?((\\\\\\)|[^\\)])+?)\\)' % self.SREG_ID, text) + + if not match: + return None + + groups = match.groups() + tabstop = (groups[1] and int(groups[1])) or -1 + self.position += len(match.group(0)) + + if text[0] == '`': + contents = groups[2].replace('\\`', '`') + else: + contents = groups[2].replace('\\)', ')') + + return Token('shell', {'tabstop': tabstop, 'contents': contents}) + + def _try_match_shell(self): + return self._match_shell() or self._text() + + def _eval_options(self, options): + reg = re.compile(self.SREG_ID) + tabstop = -1 + depend = [] + + options = options.split(':') + + for opt in options: + if reg.match(opt): + tabstop = int(opt) + else: + depend += self._parse_list(opt[1:-1]) + + return (tabstop, depend) + + def _match_eval(self): + text = self.remains() + + options = '((%s)|\\[([0-9, ]+)\\])' % self.SREG_ID + match = re.match('\\$<((%s:)*)((\\\\>|[^>])+?)>' % options, text) + + if not match: + return None + + groups = match.groups() + (tabstop, depend) = (groups[0] and self._eval_options(groups[0][:-1])) or (-1, []) + self.position += len(match.group(0)) + + return Token('eval', {'tabstop': tabstop, 'dependencies': depend, 'contents': groups[5].replace('\\>', '>')}) + + def _match_regex(self): + text = self.remains() + + content = '((?:\\\\[/]|\\\\}|[^/}])+)' + match = re.match('\\${(?:(%s):)?\\s*(%s|\\$([A-Z_]+))?[/]%s[/]%s(?:[/]([a-zA-Z]*))?}' % (self.SREG_ID, self.SREG_ID, content, content), text) + + if not match: + return None + + groups = match.groups() + tabstop = (groups[0] and int(groups[0])) or -1 + inp = (groups[2] or (groups[1] and int(groups[1]))) or '' + + pattern = re.sub('\\\\([/}])', '\\1', groups[3]) + substitution = re.sub('\\\\([/}])', '\\1', groups[4]) + modifiers = groups[5] or '' + + self.position += len(match.group(0)) + + return Token('regex', {'tabstop': tabstop, 'input': inp, 'pattern': pattern, 'substitution': substitution, 'modifiers': modifiers}) + +# ex:ts=4:et: |