summaryrefslogtreecommitdiffstats
path: root/third_party/python/Mako/mako/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/Mako/mako/lexer.py')
-rw-r--r--third_party/python/Mako/mako/lexer.py490
1 files changed, 490 insertions, 0 deletions
diff --git a/third_party/python/Mako/mako/lexer.py b/third_party/python/Mako/mako/lexer.py
new file mode 100644
index 0000000000..a02b57f8a1
--- /dev/null
+++ b/third_party/python/Mako/mako/lexer.py
@@ -0,0 +1,490 @@
+# mako/lexer.py
+# Copyright 2006-2020 the Mako authors and contributors <see AUTHORS file>
+#
+# This module is part of Mako and is released under
+# the MIT License: http://www.opensource.org/licenses/mit-license.php
+
+"""provides the Lexer class for parsing template strings into parse trees."""
+
+import codecs
+import re
+
+from mako import compat
+from mako import exceptions
+from mako import parsetree
+from mako.pygen import adjust_whitespace
+
+_regexp_cache = {}
+
+
+class Lexer(object):
+ def __init__(
+ self,
+ text,
+ filename=None,
+ disable_unicode=False,
+ input_encoding=None,
+ preprocessor=None,
+ ):
+ self.text = text
+ self.filename = filename
+ self.template = parsetree.TemplateNode(self.filename)
+ self.matched_lineno = 1
+ self.matched_charpos = 0
+ self.lineno = 1
+ self.match_position = 0
+ self.tag = []
+ self.control_line = []
+ self.ternary_stack = []
+ self.disable_unicode = disable_unicode
+ self.encoding = input_encoding
+
+ if compat.py3k and disable_unicode:
+ raise exceptions.UnsupportedError(
+ "Mako for Python 3 does not " "support disabling Unicode"
+ )
+
+ if preprocessor is None:
+ self.preprocessor = []
+ elif not hasattr(preprocessor, "__iter__"):
+ self.preprocessor = [preprocessor]
+ else:
+ self.preprocessor = preprocessor
+
+ @property
+ def exception_kwargs(self):
+ return {
+ "source": self.text,
+ "lineno": self.matched_lineno,
+ "pos": self.matched_charpos,
+ "filename": self.filename,
+ }
+
+ def match(self, regexp, flags=None):
+ """compile the given regexp, cache the reg, and call match_reg()."""
+
+ try:
+ reg = _regexp_cache[(regexp, flags)]
+ except KeyError:
+ if flags:
+ reg = re.compile(regexp, flags)
+ else:
+ reg = re.compile(regexp)
+ _regexp_cache[(regexp, flags)] = reg
+
+ return self.match_reg(reg)
+
+ def match_reg(self, reg):
+ """match the given regular expression object to the current text
+ position.
+
+ if a match occurs, update the current text and line position.
+
+ """
+
+ mp = self.match_position
+
+ match = reg.match(self.text, self.match_position)
+ if match:
+ (start, end) = match.span()
+ if end == start:
+ self.match_position = end + 1
+ else:
+ self.match_position = end
+ self.matched_lineno = self.lineno
+ lines = re.findall(r"\n", self.text[mp : self.match_position])
+ cp = mp - 1
+ while cp >= 0 and cp < self.textlength and self.text[cp] != "\n":
+ cp -= 1
+ self.matched_charpos = mp - cp
+ self.lineno += len(lines)
+ # print "MATCHED:", match.group(0), "LINE START:",
+ # self.matched_lineno, "LINE END:", self.lineno
+ # print "MATCH:", regexp, "\n", self.text[mp : mp + 15], \
+ # (match and "TRUE" or "FALSE")
+ return match
+
+ def parse_until_text(self, watch_nesting, *text):
+ startpos = self.match_position
+ text_re = r"|".join(text)
+ brace_level = 0
+ paren_level = 0
+ bracket_level = 0
+ while True:
+ match = self.match(r"#.*\n")
+ if match:
+ continue
+ match = self.match(
+ r"(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1", re.S
+ )
+ if match:
+ continue
+ match = self.match(r"(%s)" % text_re)
+ if match and not (
+ watch_nesting
+ and (brace_level > 0 or paren_level > 0 or bracket_level > 0)
+ ):
+ return (
+ self.text[
+ startpos : self.match_position - len(match.group(1))
+ ],
+ match.group(1),
+ )
+ elif not match:
+ match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S)
+ if match:
+ brace_level += match.group(1).count("{")
+ brace_level -= match.group(1).count("}")
+ paren_level += match.group(1).count("(")
+ paren_level -= match.group(1).count(")")
+ bracket_level += match.group(1).count("[")
+ bracket_level -= match.group(1).count("]")
+ continue
+ raise exceptions.SyntaxException(
+ "Expected: %s" % ",".join(text), **self.exception_kwargs
+ )
+
+ def append_node(self, nodecls, *args, **kwargs):
+ kwargs.setdefault("source", self.text)
+ kwargs.setdefault("lineno", self.matched_lineno)
+ kwargs.setdefault("pos", self.matched_charpos)
+ kwargs["filename"] = self.filename
+ node = nodecls(*args, **kwargs)
+ if len(self.tag):
+ self.tag[-1].nodes.append(node)
+ else:
+ self.template.nodes.append(node)
+ # build a set of child nodes for the control line
+ # (used for loop variable detection)
+ # also build a set of child nodes on ternary control lines
+ # (used for determining if a pass needs to be auto-inserted
+ if self.control_line:
+ control_frame = self.control_line[-1]
+ control_frame.nodes.append(node)
+ if not (
+ isinstance(node, parsetree.ControlLine)
+ and control_frame.is_ternary(node.keyword)
+ ):
+ if self.ternary_stack and self.ternary_stack[-1]:
+ self.ternary_stack[-1][-1].nodes.append(node)
+ if isinstance(node, parsetree.Tag):
+ if len(self.tag):
+ node.parent = self.tag[-1]
+ self.tag.append(node)
+ elif isinstance(node, parsetree.ControlLine):
+ if node.isend:
+ self.control_line.pop()
+ self.ternary_stack.pop()
+ elif node.is_primary:
+ self.control_line.append(node)
+ self.ternary_stack.append([])
+ elif self.control_line and self.control_line[-1].is_ternary(
+ node.keyword
+ ):
+ self.ternary_stack[-1].append(node)
+ elif self.control_line and not self.control_line[-1].is_ternary(
+ node.keyword
+ ):
+ raise exceptions.SyntaxException(
+ "Keyword '%s' not a legal ternary for keyword '%s'"
+ % (node.keyword, self.control_line[-1].keyword),
+ **self.exception_kwargs
+ )
+
+ _coding_re = re.compile(r"#.*coding[:=]\s*([-\w.]+).*\r?\n")
+
+ def decode_raw_stream(self, text, decode_raw, known_encoding, filename):
+ """given string/unicode or bytes/string, determine encoding
+ from magic encoding comment, return body as unicode
+ or raw if decode_raw=False
+
+ """
+ if isinstance(text, compat.text_type):
+ m = self._coding_re.match(text)
+ encoding = m and m.group(1) or known_encoding or "ascii"
+ return encoding, text
+
+ if text.startswith(codecs.BOM_UTF8):
+ text = text[len(codecs.BOM_UTF8) :]
+ parsed_encoding = "utf-8"
+ m = self._coding_re.match(text.decode("utf-8", "ignore"))
+ if m is not None and m.group(1) != "utf-8":
+ raise exceptions.CompileException(
+ "Found utf-8 BOM in file, with conflicting "
+ "magic encoding comment of '%s'" % m.group(1),
+ text.decode("utf-8", "ignore"),
+ 0,
+ 0,
+ filename,
+ )
+ else:
+ m = self._coding_re.match(text.decode("utf-8", "ignore"))
+ if m:
+ parsed_encoding = m.group(1)
+ else:
+ parsed_encoding = known_encoding or "ascii"
+
+ if decode_raw:
+ try:
+ text = text.decode(parsed_encoding)
+ except UnicodeDecodeError:
+ raise exceptions.CompileException(
+ "Unicode decode operation of encoding '%s' failed"
+ % parsed_encoding,
+ text.decode("utf-8", "ignore"),
+ 0,
+ 0,
+ filename,
+ )
+
+ return parsed_encoding, text
+
+ def parse(self):
+ self.encoding, self.text = self.decode_raw_stream(
+ self.text, not self.disable_unicode, self.encoding, self.filename
+ )
+
+ for preproc in self.preprocessor:
+ self.text = preproc(self.text)
+
+ # push the match marker past the
+ # encoding comment.
+ self.match_reg(self._coding_re)
+
+ self.textlength = len(self.text)
+
+ while True:
+ if self.match_position > self.textlength:
+ break
+
+ if self.match_end():
+ break
+ if self.match_expression():
+ continue
+ if self.match_control_line():
+ continue
+ if self.match_comment():
+ continue
+ if self.match_tag_start():
+ continue
+ if self.match_tag_end():
+ continue
+ if self.match_python_block():
+ continue
+ if self.match_text():
+ continue
+
+ if self.match_position > self.textlength:
+ break
+ raise exceptions.CompileException("assertion failed")
+
+ if len(self.tag):
+ raise exceptions.SyntaxException(
+ "Unclosed tag: <%%%s>" % self.tag[-1].keyword,
+ **self.exception_kwargs
+ )
+ if len(self.control_line):
+ raise exceptions.SyntaxException(
+ "Unterminated control keyword: '%s'"
+ % self.control_line[-1].keyword,
+ self.text,
+ self.control_line[-1].lineno,
+ self.control_line[-1].pos,
+ self.filename,
+ )
+ return self.template
+
+ def match_tag_start(self):
+ match = self.match(
+ r"""
+ \<% # opening tag
+
+ ([\w\.\:]+) # keyword
+
+ ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \
+ # sign, string expression
+
+ \s* # more whitespace
+
+ (/)?> # closing
+
+ """,
+ re.I | re.S | re.X,
+ )
+
+ if match:
+ keyword, attr, isend = match.groups()
+ self.keyword = keyword
+ attributes = {}
+ if attr:
+ for att in re.findall(
+ r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr
+ ):
+ key, val1, val2 = att
+ text = val1 or val2
+ text = text.replace("\r\n", "\n")
+ attributes[key] = text
+ self.append_node(parsetree.Tag, keyword, attributes)
+ if isend:
+ self.tag.pop()
+ else:
+ if keyword == "text":
+ match = self.match(r"(.*?)(?=\</%text>)", re.S)
+ if not match:
+ raise exceptions.SyntaxException(
+ "Unclosed tag: <%%%s>" % self.tag[-1].keyword,
+ **self.exception_kwargs
+ )
+ self.append_node(parsetree.Text, match.group(1))
+ return self.match_tag_end()
+ return True
+ else:
+ return False
+
+ def match_tag_end(self):
+ match = self.match(r"\</%[\t ]*(.+?)[\t ]*>")
+ if match:
+ if not len(self.tag):
+ raise exceptions.SyntaxException(
+ "Closing tag without opening tag: </%%%s>"
+ % match.group(1),
+ **self.exception_kwargs
+ )
+ elif self.tag[-1].keyword != match.group(1):
+ raise exceptions.SyntaxException(
+ "Closing tag </%%%s> does not match tag: <%%%s>"
+ % (match.group(1), self.tag[-1].keyword),
+ **self.exception_kwargs
+ )
+ self.tag.pop()
+ return True
+ else:
+ return False
+
+ def match_end(self):
+ match = self.match(r"\Z", re.S)
+ if match:
+ string = match.group()
+ if string:
+ return string
+ else:
+ return True
+ else:
+ return False
+
+ def match_text(self):
+ match = self.match(
+ r"""
+ (.*?) # anything, followed by:
+ (
+ (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based
+ # comment preceded by a
+ # consumed newline and whitespace
+ |
+ (?=\${) # an expression
+ |
+ (?=</?[%&]) # a substitution or block or call start or end
+ # - don't consume
+ |
+ (\\\r?\n) # an escaped newline - throw away
+ |
+ \Z # end of string
+ )""",
+ re.X | re.S,
+ )
+
+ if match:
+ text = match.group(1)
+ if text:
+ self.append_node(parsetree.Text, text)
+ return True
+ else:
+ return False
+
+ def match_python_block(self):
+ match = self.match(r"<%(!)?")
+ if match:
+ line, pos = self.matched_lineno, self.matched_charpos
+ text, end = self.parse_until_text(False, r"%>")
+ # the trailing newline helps
+ # compiler.parse() not complain about indentation
+ text = adjust_whitespace(text) + "\n"
+ self.append_node(
+ parsetree.Code,
+ text,
+ match.group(1) == "!",
+ lineno=line,
+ pos=pos,
+ )
+ return True
+ else:
+ return False
+
+ def match_expression(self):
+ match = self.match(r"\${")
+ if match:
+ line, pos = self.matched_lineno, self.matched_charpos
+ text, end = self.parse_until_text(True, r"\|", r"}")
+ if end == "|":
+ escapes, end = self.parse_until_text(True, r"}")
+ else:
+ escapes = ""
+ text = text.replace("\r\n", "\n")
+ self.append_node(
+ parsetree.Expression,
+ text,
+ escapes.strip(),
+ lineno=line,
+ pos=pos,
+ )
+ return True
+ else:
+ return False
+
+ def match_control_line(self):
+ match = self.match(
+ r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\r?\n)|[^\r\n])*)"
+ r"(?:\r?\n|\Z)",
+ re.M,
+ )
+ if match:
+ operator = match.group(1)
+ text = match.group(2)
+ if operator == "%":
+ m2 = re.match(r"(end)?(\w+)\s*(.*)", text)
+ if not m2:
+ raise exceptions.SyntaxException(
+ "Invalid control line: '%s'" % text,
+ **self.exception_kwargs
+ )
+ isend, keyword = m2.group(1, 2)
+ isend = isend is not None
+
+ if isend:
+ if not len(self.control_line):
+ raise exceptions.SyntaxException(
+ "No starting keyword '%s' for '%s'"
+ % (keyword, text),
+ **self.exception_kwargs
+ )
+ elif self.control_line[-1].keyword != keyword:
+ raise exceptions.SyntaxException(
+ "Keyword '%s' doesn't match keyword '%s'"
+ % (text, self.control_line[-1].keyword),
+ **self.exception_kwargs
+ )
+ self.append_node(parsetree.ControlLine, keyword, isend, text)
+ else:
+ self.append_node(parsetree.Comment, text)
+ return True
+ else:
+ return False
+
+ def match_comment(self):
+ """matches the multiline version of a comment"""
+ match = self.match(r"<%doc>(.*?)</%doc>", re.S)
+ if match:
+ self.append_node(parsetree.Comment, match.group(1))
+ return True
+ else:
+ return False