1 files changed, 1189 insertions, 0 deletions
diff --git a/third_party/python/esprima/esprima/scanner.py b/third_party/python/esprima/esprima/scanner.py
new file mode 100644
index 0000000000..53502a51d3
--- /dev/null
+++ b/third_party/python/esprima/esprima/scanner.py
@@ -0,0 +1,1189 @@
+# -*- coding: utf-8 -*-
+# Copyright JS Foundation and other contributors, https://js.foundation/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#   * Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer.
+#   * Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import absolute_import, unicode_literals
+
+import re
+
+from .objects import Object
+from .compat import xrange, unicode, uchr, uord
+from .character import Character, HEX_CONV, OCTAL_CONV
+from .messages import Messages
+from .token import Token
+
+
+def hexValue(ch):
+    return HEX_CONV[ch]
+
+
+def octalValue(ch):
+    return OCTAL_CONV[ch]
+
+
+class RegExp(Object):
+    def __init__(self, pattern=None, flags=None):
+        self.pattern = pattern
+        self.flags = flags
+
+
+class Position(Object):
+    def __init__(self, line=None, column=None, offset=None):
+        self.line = line
+        self.column = column
+        self.offset = offset
+
+
+class SourceLocation(Object):
+    def __init__(self, start=None, end=None, source=None):
+        self.start = start
+        self.end = end
+        self.source = source
+
+
+class Comment(Object):
+    def __init__(self, multiLine=None, slice=None, range=None, loc=None):
+        self.multiLine = multiLine
+        self.slice = slice
+        self.range = range
+        self.loc = loc
+
+
+class RawToken(Object):
+    def __init__(self, type=None, value=None, pattern=None, flags=None, regex=None, octal=None, cooked=None, head=None, tail=None, lineNumber=None, lineStart=None, start=None, end=None):
+        self.type = type
+        self.value = value
+        self.pattern = pattern
+        self.flags = flags
+        self.regex = regex
+        self.octal = octal
+        self.cooked = cooked
+        self.head = head
+        self.tail = tail
+        self.lineNumber = lineNumber
+        self.lineStart = lineStart
+        self.start = start
+        self.end = end
+
+
+class ScannerState(Object):
+    def __init__(self, index=None, lineNumber=None, lineStart=None):
+        self.index = index
+        self.lineNumber = lineNumber
+        self.lineStart = lineStart
+
+
+class Octal(object):
+    def __init__(self, octal, code):
+        self.octal = octal
+        self.code = code
+
+
+class Scanner(object):
+    def __init__(self, code, handler):
+        self.source = unicode(code) + '\x00'
+        self.errorHandler = handler
+        self.trackComment = False
+        self.isModule = False
+
+        self.length = len(code)
+        self.index = 0
+        self.lineNumber = 1 if self.length > 0 else 0
+        self.lineStart = 0
+        self.curlyStack = []
+
+    def saveState(self):
+        return ScannerState(
+            index=self.index,
+            lineNumber=self.lineNumber,
+            lineStart=self.lineStart
+        )
+
+    def restoreState(self, state):
+        self.index = state.index
+        self.lineNumber = state.lineNumber
+        self.lineStart = state.lineStart
+
+    def eof(self):
+        return self.index >= self.length
+
+    def throwUnexpectedToken(self, message=Messages.UnexpectedTokenIllegal):
+        return self.errorHandler.throwError(self.index, self.lineNumber,
+            self.index - self.lineStart + 1, message)
+
+    def tolerateUnexpectedToken(self, message=Messages.UnexpectedTokenIllegal):
+        self.errorHandler.tolerateError(self.index, self.lineNumber,
+            self.index - self.lineStart + 1, message)
+
+    # https://tc39.github.io/ecma262/#sec-comments
+
+    def skipSingleLineComment(self, offset):
+        comments = []
+
+        if self.trackComment:
+            start = self.index - offset
+            loc = SourceLocation(
+                start=Position(
+                    line=self.lineNumber,
+                    column=self.index - self.lineStart - offset
+                ),
+                end=Position()
+            )
+
+        while not self.eof():
+            ch = self.source[self.index]
+            self.index += 1
+            if Character.isLineTerminator(ch):
+                if self.trackComment:
+                    loc.end = Position(
+                        line=self.lineNumber,
+                        column=self.index - self.lineStart - 1
+                    )
+                    entry = Comment(
+                        multiLine=False,
+                        slice=[start + offset, self.index - 1],
+                        range=[start, self.index - 1],
+                        loc=loc
+                    )
+                    comments.append(entry)
+
+                if ch == '\r' and self.source[self.index] == '\n':
+                    self.index += 1
+
+                self.lineNumber += 1
+                self.lineStart = self.index
+                return comments
+
+        if self.trackComment:
+            loc.end = Position(
+                line=self.lineNumber,
+                column=self.index - self.lineStart
+            )
+            entry = Comment(
+                multiLine=False,
+                slice=[start + offset, self.index],
+                range=[start, self.index],
+                loc=loc
+            )
+            comments.append(entry)
+
+        return comments
+
+    def skipMultiLineComment(self):
+        comments = []
+
+        if self.trackComment:
+            comments = []
+            start = self.index - 2
+            loc = SourceLocation(
+                start=Position(
+                    line=self.lineNumber,
+                    column=self.index - self.lineStart - 2
+                ),
+                end=Position()
+            )
+
+        while not self.eof():
+            ch = self.source[self.index]
+            if Character.isLineTerminator(ch):
+                if ch == '\r' and self.source[self.index + 1] == '\n':
+                    self.index += 1
+
+                self.lineNumber += 1
+                self.index += 1
+                self.lineStart = self.index
+            elif ch == '*':
+                # Block comment ends with '*/'.
+                if self.source[self.index + 1] == '/':
+                    self.index += 2
+                    if self.trackComment:
+                        loc.end = Position(
+                            line=self.lineNumber,
+                            column=self.index - self.lineStart
+                        )
+                        entry = Comment(
+                            multiLine=True,
+                            slice=[start + 2, self.index - 2],
+                            range=[start, self.index],
+                            loc=loc
+                        )
+                        comments.append(entry)
+
+                    return comments
+
+                self.index += 1
+            else:
+                self.index += 1
+
+        # Ran off the end of the file - the whole thing is a comment
+        if self.trackComment:
+            loc.end = Position(
+                line=self.lineNumber,
+                column=self.index - self.lineStart
+            )
+            entry = Comment(
+                multiLine=True,
+                slice=[start + 2, self.index],
+                range=[start, self.index],
+                loc=loc
+            )
+            comments.append(entry)
+
+        self.tolerateUnexpectedToken()
+        return comments
+
+    def scanComments(self):
+        comments = []
+
+        start = self.index == 0
+        while not self.eof():
+            ch = self.source[self.index]
+
+            if Character.isWhiteSpace(ch):
+                self.index += 1
+            elif Character.isLineTerminator(ch):
+                self.index += 1
+                if ch == '\r' and self.source[self.index] == '\n':
+                    self.index += 1
+
+                self.lineNumber += 1
+                self.lineStart = self.index
+                start = True
+            elif ch == '/':  # U+002F is '/'
+                ch = self.source[self.index + 1]
+                if ch == '/':
+                    self.index += 2
+                    comment = self.skipSingleLineComment(2)
+                    if self.trackComment:
+                        comments.extend(comment)
+
+                    start = True
+                elif ch == '*':  # U+002A is '*'
+                    self.index += 2
+                    comment = self.skipMultiLineComment()
+                    if self.trackComment:
+                        comments.extend(comment)
+
+                else:
+                    break
+
+            elif start and ch == '-':  # U+002D is '-'
+                # U+003E is '>'
+                if self.source[self.index + 1:self.index + 3] == '->':
+                    # '-->' is a single-line comment
+                    self.index += 3
+                    comment = self.skipSingleLineComment(3)
+                    if self.trackComment:
+                        comments.extend(comment)
+
+                else:
+                    break
+
+            elif ch == '<' and not self.isModule:  # U+003C is '<'
+                if self.source[self.index + 1:self.index + 4] == '!--':
+                    self.index += 4  # `<!--`
+                    comment = self.skipSingleLineComment(4)
+                    if self.trackComment:
+                        comments.extend(comment)
+
+                else:
+                    break
+
+            else:
+                break
+
+        return comments
+
+    # https://tc39.github.io/ecma262/#sec-future-reserved-words
+
+    def isFutureReservedWord(self, id):
+        return id in self.isFutureReservedWord.set
+    isFutureReservedWord.set = set((
+        'enum',
+        'export',
+        'import',
+        'super',
+    ))
+
+    def isStrictModeReservedWord(self, id):
+        return id in self.isStrictModeReservedWord.set
+    isStrictModeReservedWord.set = set((
+        'implements',
+        'interface',
+        'package',
+        'private',
+        'protected',
+        'public',
+        'static',
+        'yield',
+        'let',
+    ))
+
+    def isRestrictedWord(self, id):
+        return id in self.isRestrictedWord.set
+    isRestrictedWord.set = set((
+        'eval', 'arguments',
+    ))
+
+    # https://tc39.github.io/ecma262/#sec-keywords
+
+    def isKeyword(self, id):
+        return id in self.isKeyword.set
+    isKeyword.set = set((
+        'if', 'in', 'do',
+
+        'var', 'for', 'new',
+        'try', 'let',
+
+        'this', 'else', 'case',
+        'void', 'with', 'enum',
+
+        'while', 'break', 'catch',
+        'throw', 'const', 'yield',
+        'class', 'super',
+
+        'return', 'typeof', 'delete',
+        'switch', 'export', 'import',
+
+        'default', 'finally', 'extends',
+
+        'function', 'continue', 'debugger',
+
+        'instanceof',
+    ))
+
+    def codePointAt(self, i):
+        return uord(self.source[i:i + 2])
+
+    def scanHexEscape(self, prefix):
+        length = 4 if prefix == 'u' else 2
+        code = 0
+
+        for i in xrange(length):
+            if not self.eof() and Character.isHexDigit(self.source[self.index]):
+                ch = self.source[self.index]
+                self.index += 1
+                code = code * 16 + hexValue(ch)
+            else:
+                return None
+
+        return uchr(code)
+
+    def scanUnicodeCodePointEscape(self):
+        ch = self.source[self.index]
+        code = 0
+
+        # At least, one hex digit is required.
+        if ch == '}':
+            self.throwUnexpectedToken()
+
+        while not self.eof():
+            ch = self.source[self.index]
+            self.index += 1
+            if not Character.isHexDigit(ch):
+                break
+
+            code = code * 16 + hexValue(ch)
+
+        if code > 0x10FFFF or ch != '}':
+            self.throwUnexpectedToken()
+
+        return Character.fromCodePoint(code)
+
+    def getIdentifier(self):
+        start = self.index
+        self.index += 1
+        while not self.eof():
+            ch = self.source[self.index]
+            if ch == '\\':
+                # Blackslash (U+005C) marks Unicode escape sequence.
+                self.index = start
+                return self.getComplexIdentifier()
+            else:
+                cp = ord(ch)
+                if cp >= 0xD800 and cp < 0xDFFF:
+                    # Need to handle surrogate pairs.
+                    self.index = start
+                    return self.getComplexIdentifier()
+
+            if Character.isIdentifierPart(ch):
+                self.index += 1
+            else:
+                break
+
+        return self.source[start:self.index]
+
+    def getComplexIdentifier(self):
+        cp = self.codePointAt(self.index)
+        id = Character.fromCodePoint(cp)
+        self.index += len(id)
+
+        # '\u' (U+005C, U+0075) denotes an escaped character.
+        if cp == 0x5C:
+            if self.source[self.index] != 'u':
+                self.throwUnexpectedToken()
+
+            self.index += 1
+            if self.source[self.index] == '{':
+                self.index += 1
+                ch = self.scanUnicodeCodePointEscape()
+            else:
+                ch = self.scanHexEscape('u')
+                if not ch or ch == '\\' or not Character.isIdentifierStart(ch[0]):
+                    self.throwUnexpectedToken()
+
+            id = ch
+
+        while not self.eof():
+            cp = self.codePointAt(self.index)
+            ch = Character.fromCodePoint(cp)
+            if not Character.isIdentifierPart(ch):
+                break
+
+            id += ch
+            self.index += len(ch)
+
+            # '\u' (U+005C, U+0075) denotes an escaped character.
+            if cp == 0x5C:
+                id = id[:-1]
+                if self.source[self.index] != 'u':
+                    self.throwUnexpectedToken()
+
+                self.index += 1
+                if self.source[self.index] == '{':
+                    self.index += 1
+                    ch = self.scanUnicodeCodePointEscape()
+                else:
+                    ch = self.scanHexEscape('u')
+                    if not ch or ch == '\\' or not Character.isIdentifierPart(ch[0]):
+                        self.throwUnexpectedToken()
+
+                id += ch
+
+        return id
+
+    def octalToDecimal(self, ch):
+        # \0 is not octal escape sequence
+        octal = ch != '0'
+        code = octalValue(ch)
+
+        if not self.eof() and Character.isOctalDigit(self.source[self.index]):
+            octal = True
+            code = code * 8 + octalValue(self.source[self.index])
+            self.index += 1
+
+            # 3 digits are only allowed when string starts
+            # with 0, 1, 2, 3
+            if ch in '0123' and not self.eof() and Character.isOctalDigit(self.source[self.index]):
+                code = code * 8 + octalValue(self.source[self.index])
+                self.index += 1
+
+        return Octal(octal, code)
+
+    # https://tc39.github.io/ecma262/#sec-names-and-keywords
+
+    def scanIdentifier(self):
+        start = self.index
+
+        # Backslash (U+005C) starts an escaped character.
+        id = self.getComplexIdentifier() if self.source[start] == '\\' else self.getIdentifier()
+
+        # There is no keyword or literal with only one character.
+        # Thus, it must be an identifier.
+        if len(id) == 1:
+            type = Token.Identifier
+        elif self.isKeyword(id):
+            type = Token.Keyword
+        elif id == 'null':
+            type = Token.NullLiteral
+        elif id == 'true' or id == 'false':
+            type = Token.BooleanLiteral
+        else:
+            type = Token.Identifier
+
+        if type is not Token.Identifier and start + len(id) != self.index:
+            restore = self.index
+            self.index = start
+            self.tolerateUnexpectedToken(Messages.InvalidEscapedReservedWord)
+            self.index = restore
+
+        return RawToken(
+            type=type,
+            value=id,
+            lineNumber=self.lineNumber,
+            lineStart=self.lineStart,
+            start=start,
+            end=self.index
+        )
+
+    # https://tc39.github.io/ecma262/#sec-punctuators
+
+    def scanPunctuator(self):
+        start = self.index
+
+        # Check for most common single-character punctuators.
+        str = self.source[self.index]
+        if str in (
+            '(',
+            '{',
+        ):
+            if str == '{':
+                self.curlyStack.append('{')
+
+            self.index += 1
+
+        elif str == '.':
+            self.index += 1
+            if self.source[self.index] == '.' and self.source[self.index + 1] == '.':
+                # Spread operator: ...
+                self.index += 2
+                str = '...'
+
+        elif str == '}':
+            self.index += 1
+            if self.curlyStack:
+                self.curlyStack.pop()
+
+        elif str in (
+            ')',
+            ';',
+            ',',
+            '[',
+            ']',
+            ':',
+            '?',
+            '~',
+        ):
+            self.index += 1
+
+        else:
+            # 4-character punctuator.
+            str = self.source[self.index:self.index + 4]
+            if str == '>>>=':
+                self.index += 4
+            else:
+
+                # 3-character punctuators.
+                str = str[:3]
+                if str in (
+                    '===', '!==', '>>>',
+                    '<<=', '>>=', '**='
+                ):
+                    self.index += 3
+                else:
+
+                    # 2-character punctuators.
+                    str = str[:2]
+                    if str in (
+                        '&&', '||', '==', '!=',
+                        '+=', '-=', '*=', '/=',
+                        '++', '--', '<<', '>>',
+                        '&=', '|=', '^=', '%=',
+                        '<=', '>=', '=>', '**',
+                    ):
+                        self.index += 2
+                    else:
+
+                        # 1-character punctuators.
+                        str = self.source[self.index]
+                        if str in '<>=!+-*%&|^/':
+                            self.index += 1
+
+        if self.index == start:
+            self.throwUnexpectedToken()
+
+        return RawToken(
+            type=Token.Punctuator,
+            value=str,
+            lineNumber=self.lineNumber,
+            lineStart=self.lineStart,
+            start=start,
+            end=self.index
+        )
+
+    # https://tc39.github.io/ecma262/#sec-literals-numeric-literals
+
+    def scanHexLiteral(self, start):
+        num = ''
+
+        while not self.eof():
+            if not Character.isHexDigit(self.source[self.index]):
+                break
+
+            num += self.source[self.index]
+            self.index += 1
+
+        if len(num) == 0:
+            self.throwUnexpectedToken()
+
+        if Character.isIdentifierStart(self.source[self.index]):
+            self.throwUnexpectedToken()
+
+        return RawToken(
+            type=Token.NumericLiteral,
+            value=int(num, 16),
+            lineNumber=self.lineNumber,
+            lineStart=self.lineStart,
+            start=start,
+            end=self.index
+        )
+
+    def scanBinaryLiteral(self, start):
+        num = ''
+
+        while not self.eof():
+            ch = self.source[self.index]
+            if ch != '0' and ch != '1':
+                break
+
+            num += self.source[self.index]
+            self.index += 1
+
+        if len(num) == 0:
+            # only 0b or 0B
+            self.throwUnexpectedToken()
+
+        if not self.eof():
+            ch = self.source[self.index]
+            if Character.isIdentifierStart(ch) or Character.isDecimalDigit(ch):
+                self.throwUnexpectedToken()
+
+        return RawToken(
+            type=Token.NumericLiteral,
+            value=int(num, 2),
+            lineNumber=self.lineNumber,
+            lineStart=self.lineStart,
+            start=start,
+            end=self.index
+        )
+
+    def scanOctalLiteral(self, prefix, start):
+        num = ''
+        octal = False
+
+        if Character.isOctalDigit(prefix[0]):
+            octal = True
+            num = '0' + self.source[self.index]
+        self.index += 1
+
+        while not self.eof():
+            if not Character.isOctalDigit(self.source[self.index]):
+                break
+
+            num += self.source[self.index]
+            self.index += 1
+
+        if not octal and len(num) == 0:
+            # only 0o or 0O
+            self.throwUnexpectedToken()
+
+        if Character.isIdentifierStart(self.source[self.index]) or Character.isDecimalDigit(self.source[self.index]):
+            self.throwUnexpectedToken()
+
+        return RawToken(
+            type=Token.NumericLiteral,
+            value=int(num, 8),
+            octal=octal,
+            lineNumber=self.lineNumber,
+            lineStart=self.lineStart,
+            start=start,
+            end=self.index
+        )
+
+    def isImplicitOctalLiteral(self):
+        # Implicit octal, unless there is a non-octal digit.
+        # (Annex B.1.1 on Numeric Literals)
+        for i in xrange(self.index + 1, self.length):
+            ch = self.source[i]
+            if ch in '89':
+                return False
+            if not Character.isOctalDigit(ch):
+                return True
+        return True
+
+    def scanNumericLiteral(self):
+        start = self.index
+        ch = self.source[start]
+        assert Character.isDecimalDigit(ch) or ch == '.', 'Numeric literal must start with a decimal digit or a decimal point'
+
+        num = ''
+        if ch != '.':
+            num = self.source[self.index]
+            self.index += 1
+            ch = self.source[self.index]
+
+            # Hex number starts with '0x'.
+            # Octal number starts with '0'.
+            # Octal number in ES6 starts with '0o'.
+            # Binary number in ES6 starts with '0b'.
+            if num == '0':
+                if ch in ('x', 'X'):
+                    self.index += 1
+                    return self.scanHexLiteral(start)
+
+                if ch in ('b', 'B'):
+                    self.index += 1
+                    return self.scanBinaryLiteral(start)
+
+                if ch in ('o', 'O'):
+                    return self.scanOctalLiteral(ch, start)
+
+                if ch and Character.isOctalDigit(ch):
+                    if self.isImplicitOctalLiteral():
+                        return self.scanOctalLiteral(ch, start)
+
+            while Character.isDecimalDigit(self.source[self.index]):
+                num += self.source[self.index]
+                self.index += 1
+
+            ch = self.source[self.index]
+
+        if ch == '.':
+            num += self.source[self.index]
+            self.index += 1
+            while Character.isDecimalDigit(self.source[self.index]):
+                num += self.source[self.index]
+                self.index += 1
+
+            ch = self.source[self.index]
+
+        if ch in ('e', 'E'):
+            num += self.source[self.index]
+            self.index += 1
+
+            ch = self.source[self.index]
+            if ch in ('+', '-'):
+                num += self.source[self.index]
+                self.index += 1
+
+            if Character.isDecimalDigit(self.source[self.index]):
+                while Character.isDecimalDigit(self.source[self.index]):
+                    num += self.source[self.index]
+                    self.index += 1
+
+            else:
+                self.throwUnexpectedToken()
+
+        if Character.isIdentifierStart(self.source[self.index]):
+            self.throwUnexpectedToken()
+
+        value = float(num)
+        return RawToken(
+            type=Token.NumericLiteral,
+            value=int(value) if value.is_integer() else value,
+            lineNumber=self.lineNumber,
+            lineStart=self.lineStart,
+            start=start,
+            end=self.index
+        )
+
+    # https://tc39.github.io/ecma262/#sec-literals-string-literals
+
+    def scanStringLiteral(self):
+        start = self.index
+        quote = self.source[start]
+        assert quote in ('\'', '"'), 'String literal must starts with a quote'
+
+        self.index += 1
+        octal = False
+        str = ''
+
+        while not self.eof():
+            ch = self.source[self.index]
+            self.index += 1
+
+            if ch == quote:
+                quote = ''
+                break
+            elif ch == '\\':
+                ch = self.source[self.index]
+                self.index += 1
+                if not ch or not Character.isLineTerminator(ch):
+                    if ch == 'u':
+                        if self.source[self.index] == '{':
+                            self.index += 1
+                            str += self.scanUnicodeCodePointEscape()
+                        else:
+                            unescapedChar = self.scanHexEscape(ch)
+                            if not unescapedChar:
+                                self.throwUnexpectedToken()
+
+                            str += unescapedChar
+
+                    elif ch == 'x':
+                        unescaped = self.scanHexEscape(ch)
+                        if not unescaped:
+                            self.throwUnexpectedToken(Messages.InvalidHexEscapeSequence)
+
+                        str += unescaped
+                    elif ch == 'n':
+                        str += '\n'
+                    elif ch == 'r':
+                        str += '\r'
+                    elif ch == 't':
+                        str += '\t'
+                    elif ch == 'b':
+                        str += '\b'
+                    elif ch == 'f':
+                        str += '\f'
+                    elif ch == 'v':
+                        str += '\x0B'
+                    elif ch in (
+                        '8',
+                        '9',
+                    ):
+                        str += ch
+                        self.tolerateUnexpectedToken()
+
+                    else:
+                        if ch and Character.isOctalDigit(ch):
+                            octToDec = self.octalToDecimal(ch)
+
+                            octal = octToDec.octal or octal
+                            str += uchr(octToDec.code)
+                        else:
+                            str += ch
+
+                else:
+                    self.lineNumber += 1
+                    if ch == '\r' and self.source[self.index] == '\n':
+                        self.index += 1
+
+                    self.lineStart = self.index
+
+            elif Character.isLineTerminator(ch):
+                break
+            else:
+                str += ch
+
+        if quote != '':
+            self.index = start
+            self.throwUnexpectedToken()
+
+        return RawToken(
+            type=Token.StringLiteral,
+            value=str,
+            octal=octal,
+            lineNumber=self.lineNumber,
+            lineStart=self.lineStart,
+            start=start,
+            end=self.index
+        )
+
+    # https://tc39.github.io/ecma262/#sec-template-literal-lexical-components
+
+    def scanTemplate(self):
+        cooked = ''
+        terminated = False
+        start = self.index
+
+        head = self.source[start] == '`'
+        tail = False
+        rawOffset = 2
+
+        self.index += 1
+
+        while not self.eof():
+            ch = self.source[self.index]
+            self.index += 1
+            if ch == '`':
+                rawOffset = 1
+                tail = True
+                terminated = True
+                break
+            elif ch == '$':
+                if self.source[self.index] == '{':
+                    self.curlyStack.append('${')
+                    self.index += 1
+                    terminated = True
+                    break
+
+                cooked += ch
+            elif ch == '\\':
+                ch = self.source[self.index]
+                self.index += 1
+                if not Character.isLineTerminator(ch):
+                    if ch == 'n':
+                        cooked += '\n'
+                    elif ch == 'r':
+                        cooked += '\r'
+                    elif ch == 't':
+                        cooked += '\t'
+                    elif ch == 'u':
+                        if self.source[self.index] == '{':
+                            self.index += 1
+                            cooked += self.scanUnicodeCodePointEscape()
+                        else:
+                            restore = self.index
+                            unescapedChar = self.scanHexEscape(ch)
+                            if unescapedChar:
+                                cooked += unescapedChar
+                            else:
+                                self.index = restore
+                                cooked += ch
+
+                    elif ch == 'x':
+                        unescaped = self.scanHexEscape(ch)
+                        if not unescaped:
+                            self.throwUnexpectedToken(Messages.InvalidHexEscapeSequence)
+
+                        cooked += unescaped
+                    elif ch == 'b':
+                        cooked += '\b'
+                    elif ch == 'f':
+                        cooked += '\f'
+                    elif ch == 'v':
+                        cooked += '\v'
+
+                    else:
+                        if ch == '0':
+                            if Character.isDecimalDigit(self.source[self.index]):
+                                # Illegal: \01 \02 and so on
+                                self.throwUnexpectedToken(Messages.TemplateOctalLiteral)
+
+                            cooked += '\0'
+                        elif Character.isOctalDigit(ch):
+                            # Illegal: \1 \2
+                            self.throwUnexpectedToken(Messages.TemplateOctalLiteral)
+                        else:
+                            cooked += ch
+
+                else:
+                    self.lineNumber += 1
+                    if ch == '\r' and self.source[self.index] == '\n':
+                        self.index += 1
+
+                    self.lineStart = self.index
+
+            elif Character.isLineTerminator(ch):
+                self.lineNumber += 1
+                if ch == '\r' and self.source[self.index] == '\n':
+                    self.index += 1
+
+                self.lineStart = self.index
+                cooked += '\n'
+            else:
+                cooked += ch
+
+        if not terminated:
+            self.throwUnexpectedToken()
+
+        if not head:
+            if self.curlyStack:
+                self.curlyStack.pop()
+
+        return RawToken(
+            type=Token.Template,
+            value=self.source[start + 1:self.index - rawOffset],
+            cooked=cooked,
+            head=head,
+            tail=tail,
+            lineNumber=self.lineNumber,
+            lineStart=self.lineStart,
+            start=start,
+            end=self.index
+        )
+
+    # https://tc39.github.io/ecma262/#sec-literals-regular-expression-literals
+
+    def testRegExp(self, pattern, flags):
+        # The BMP character to use as a replacement for astral symbols when
+        # translating an ES6 "u"-flagged pattern to an ES5-compatible
+        # approximation.
+        # Note: replacing with '\uFFFF' enables false positives in unlikely
+        # scenarios. For example, `[\u{1044f}-\u{10440}]` is an invalid
+        # pattern that would not be detected by this substitution.
+        astralSubstitute = '\uFFFF'
+
+        # Replace every Unicode escape sequence with the equivalent
+        # BMP character or a constant ASCII code point in the case of
+        # astral symbols. (See the above note on `astralSubstitute`
+        # for more information.)
+        def astralSub(m):
+            codePoint = int(m.group(1) or m.group(2), 16)
+            if codePoint > 0x10FFFF:
+                self.tolerateUnexpectedToken(Messages.InvalidRegExp)
+            elif codePoint <= 0xFFFF:
+                return uchr(codePoint)
+            return astralSubstitute
+        pattern = re.sub(r'\\u\{([0-9a-fA-F]+)\}|\\u([a-fA-F0-9]{4})', astralSub, pattern)
+
+        # Replace each paired surrogate with a single ASCII symbol to
+        # avoid throwing on regular expressions that are only valid in
+        # combination with the "u" flag.
+        pattern = re.sub(r'[\uD800-\uDBFF][\uDC00-\uDFFF]', astralSubstitute, pattern)
+
+        # Return a regular expression object for this pattern-flag pair, or
+        # `null` in case the current environment doesn't support the flags it
+        # uses.
+        pyflags = 0 | re.M if 'm' in flags else 0 | re.I if 'i' in flags else 0
+        try:
+            return re.compile(pattern, pyflags)
+        except Exception:
+            self.tolerateUnexpectedToken(Messages.InvalidRegExp)
+
+    def scanRegExpBody(self):
+        ch = self.source[self.index]
+        assert ch == '/', 'Regular expression literal must start with a slash'
+
+        str = self.source[self.index]
+        self.index += 1
+        classMarker = False
+        terminated = False
+
+        while not self.eof():
+            ch = self.source[self.index]
+            self.index += 1
+            str += ch
+            if ch == '\\':
+                ch = self.source[self.index]
+                self.index += 1
+                # https://tc39.github.io/ecma262/#sec-literals-regular-expression-literals
+                if Character.isLineTerminator(ch):
+                    self.throwUnexpectedToken(Messages.UnterminatedRegExp)
+
+                str += ch
+            elif Character.isLineTerminator(ch):
+                self.throwUnexpectedToken(Messages.UnterminatedRegExp)
+            elif classMarker:
+                if ch == ']':
+                    classMarker = False
+
+            else:
+                if ch == '/':
+                    terminated = True
+                    break
+                elif ch == '[':
+                    classMarker = True
+
+        if not terminated:
+            self.throwUnexpectedToken(Messages.UnterminatedRegExp)
+
+        # Exclude leading and trailing slash.
+        return str[1:-1]
+
+    def scanRegExpFlags(self):
+        str = ''
+        flags = ''
+        while not self.eof():
+            ch = self.source[self.index]
+            if not Character.isIdentifierPart(ch):
+                break
+
+            self.index += 1
+            if ch == '\\' and not self.eof():
+                ch = self.source[self.index]
+                if ch == 'u':
+                    self.index += 1
+                    restore = self.index
+                    char = self.scanHexEscape('u')
+                    if char:
+                        flags += char
+                        str += '\\u'
+                        while restore < self.index:
+                            str += self.source[restore]
+                            restore += 1
+
+                    else:
+                        self.index = restore
+                        flags += 'u'
+                        str += '\\u'
+
+                    self.tolerateUnexpectedToken()
+                else:
+                    str += '\\'
+                    self.tolerateUnexpectedToken()
+
+            else:
+                flags += ch
+                str += ch
+
+        return flags
+
+    def scanRegExp(self):
+        start = self.index
+
+        pattern = self.scanRegExpBody()
+        flags = self.scanRegExpFlags()
+        value = self.testRegExp(pattern, flags)
+
+        return RawToken(
+            type=Token.RegularExpression,
+            value='',
+            pattern=pattern,
+            flags=flags,
+            regex=value,
+            lineNumber=self.lineNumber,
+            lineStart=self.lineStart,
+            start=start,
+            end=self.index
+        )
+
+    def lex(self):
+        if self.eof():
+            return RawToken(
+                type=Token.EOF,
+                value='',
+                lineNumber=self.lineNumber,
+                lineStart=self.lineStart,
+                start=self.index,
+                end=self.index
+            )
+
+        ch = self.source[self.index]
+
+        if Character.isIdentifierStart(ch):
+            return self.scanIdentifier()
+
+        # Very common: ( and ) and ;
+        if ch in ('(', ')', ';'):
+            return self.scanPunctuator()
+
+        # String literal starts with single quote (U+0027) or double quote (U+0022).
+        if ch in ('\'', '"'):
+            return self.scanStringLiteral()
+
+        # Dot (.) U+002E can also start a floating-point number, hence the need
+        # to check the next character.
+        if ch == '.':
+            if Character.isDecimalDigit(self.source[self.index + 1]):
+                return self.scanNumericLiteral()
+
+            return self.scanPunctuator()
+
+        if Character.isDecimalDigit(ch):
+            return self.scanNumericLiteral()
+
+        # Template literals start with ` (U+0060) for template head
+        # or } (U+007D) for template middle or template tail.
+        if ch == '`' or (ch == '}' and self.curlyStack and self.curlyStack[-1] == '${'):
+            return self.scanTemplate()
+
+        # Possible identifier start in a surrogate pair.
+        cp = ord(ch)
+        if cp >= 0xD800 and cp < 0xDFFF:
+            cp = self.codePointAt(self.index)
+            ch = Character.fromCodePoint(cp)
+            if Character.isIdentifierStart(ch):
+                return self.scanIdentifier()
+
+        return self.scanPunctuator()