diff options
Diffstat (limited to 'third_party/python/esprima/esprima/tokenizer.py')
-rw-r--r-- | third_party/python/esprima/esprima/tokenizer.py | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/third_party/python/esprima/esprima/tokenizer.py b/third_party/python/esprima/esprima/tokenizer.py new file mode 100644 index 0000000000..288193965d --- /dev/null +++ b/third_party/python/esprima/esprima/tokenizer.py @@ -0,0 +1,193 @@ +# -*- coding: utf-8 -*- +# Copyright JS Foundation and other contributors, https://js.foundation/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES +# LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import absolute_import, unicode_literals + +from collections import deque + +from .objects import Object +from .error_handler import ErrorHandler +from .scanner import Scanner, SourceLocation, Position, RegExp +from .token import Token, TokenName + + +class BufferEntry(Object): + def __init__(self, type, value, regex=None, range=None, loc=None): + self.type = type + self.value = value + self.regex = regex + self.range = range + self.loc = loc + + +class Reader(object): + def __init__(self): + self.values = [] + self.curly = self.paren = -1 + + # A function following one of those tokens is an expression. + def beforeFunctionExpression(self, t): + return t in ( + '(', '{', '[', 'in', 'typeof', 'instanceof', 'new', + 'return', 'case', 'delete', 'throw', 'void', + # assignment operators + '=', '+=', '-=', '*=', '**=', '/=', '%=', '<<=', '>>=', '>>>=', + '&=', '|=', '^=', ',', + # binary/unary operators + '+', '-', '*', '**', '/', '%', '++', '--', '<<', '>>', '>>>', '&', + '|', '^', '!', '~', '&&', '||', '?', ':', '===', '==', '>=', + '<=', '<', '>', '!=', '!==' + ) + + # Determine if forward slash (/) is an operator or part of a regular expression + # https://github.com/mozilla/sweet.js/wiki/design + def isRegexStart(self): + if not self.values: + return True + + previous = self.values[-1] + regex = previous is not None + + if previous in ( + 'this', + ']', + ): + regex = False + elif previous == ')': + keyword = self.values[self.paren - 1] + regex = keyword in ('if', 'while', 'for', 'with') + + elif previous == '}': + # Dividing a function by anything makes little sense, + # but we have to check for that. + regex = True + if len(self.values) >= 3 and self.values[self.curly - 3] == 'function': + # Anonymous function, e.g. function(){} /42 + check = self.values[self.curly - 4] + regex = not self.beforeFunctionExpression(check) if check else False + elif len(self.values) >= 4 and self.values[self.curly - 4] == 'function': + # Named function, e.g. function f(){} /42/ + check = self.values[self.curly - 5] + regex = not self.beforeFunctionExpression(check) if check else True + + return regex + + def append(self, token): + if token.type in (Token.Punctuator, Token.Keyword): + if token.value == '{': + self.curly = len(self.values) + elif token.value == '(': + self.paren = len(self.values) + self.values.append(token.value) + else: + self.values.append(None) + + +class Config(Object): + def __init__(self, tolerant=None, comment=None, range=None, loc=None, **options): + self.tolerant = tolerant + self.comment = comment + self.range = range + self.loc = loc + for k, v in options.items(): + setattr(self, k, v) + + +class Tokenizer(object): + def __init__(self, code, options): + self.config = Config(**options) + + self.errorHandler = ErrorHandler() + self.errorHandler.tolerant = self.config.tolerant + self.scanner = Scanner(code, self.errorHandler) + self.scanner.trackComment = self.config.comment + + self.trackRange = self.config.range + self.trackLoc = self.config.loc + self.buffer = deque() + self.reader = Reader() + + def errors(self): + return self.errorHandler.errors + + def getNextToken(self): + if not self.buffer: + + comments = self.scanner.scanComments() + if self.scanner.trackComment: + for e in comments: + value = self.scanner.source[e.slice[0]:e.slice[1]] + comment = BufferEntry( + type='BlockComment' if e.multiLine else 'LineComment', + value=value + ) + if self.trackRange: + comment.range = e.range + if self.trackLoc: + comment.loc = e.loc + self.buffer.append(comment) + + if not self.scanner.eof(): + if self.trackLoc: + loc = SourceLocation( + start=Position( + line=self.scanner.lineNumber, + column=self.scanner.index - self.scanner.lineStart + ), + end=Position(), + ) + + maybeRegex = self.scanner.source[self.scanner.index] == '/' and self.reader.isRegexStart() + if maybeRegex: + state = self.scanner.saveState() + try: + token = self.scanner.scanRegExp() + except Exception: + self.scanner.restoreState(state) + token = self.scanner.lex() + else: + token = self.scanner.lex() + + self.reader.append(token) + + entry = BufferEntry( + type=TokenName[token.type], + value=self.scanner.source[token.start:token.end] + ) + if self.trackRange: + entry.range = [token.start, token.end] + if self.trackLoc: + loc.end = Position( + line=self.scanner.lineNumber, + column=self.scanner.index - self.scanner.lineStart + ) + entry.loc = loc + if token.type is Token.RegularExpression: + entry.regex = RegExp( + pattern=token.pattern, + flags=token.flags, + ) + + self.buffer.append(entry) + + return self.buffer.popleft() if self.buffer else None |