summaryrefslogtreecommitdiffstats
path: root/third_party/python/json-e/jsone/prattparser.py
blob: 5bf250a8161aabaeabd45289a779ecb9351d8a45 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
from __future__ import absolute_import, print_function, unicode_literals

import re
from collections import namedtuple
from .shared import TemplateError
from .six import with_metaclass, viewitems


class SyntaxError(TemplateError):

    @classmethod
    def unexpected(cls, got, exp):
        exp = ', '.join(sorted(exp))
        return cls('Found {}, expected {}'.format(got.value, exp))


Token = namedtuple('Token', ['kind', 'value', 'start', 'end'])


def prefix(*kinds):
    """Decorate a method as handling prefix tokens of the given kinds"""
    def wrap(fn):
        try:
            fn.prefix_kinds.extend(kinds)
        except AttributeError:
            fn.prefix_kinds = list(kinds)
        return fn
    return wrap


def infix(*kinds):
    """Decorate a method as handling infix tokens of the given kinds"""
    def wrap(fn):
        try:
            fn.infix_kinds.extend(kinds)
        except AttributeError:
            fn.infix_kinds = list(kinds)
        return fn
    return wrap


class PrattParserMeta(type):

    def __init__(cls, name, bases, body):
        # set up rules based on decorated methods
        infix_rules = cls.infix_rules = {}
        prefix_rules = cls.prefix_rules = {}
        for prop, value in viewitems(body):
            if hasattr(value, 'prefix_kinds'):
                for kind in value.prefix_kinds:
                    prefix_rules[kind] = value
                delattr(cls, prop)
            if hasattr(value, 'infix_kinds'):
                for kind in value.infix_kinds:
                    infix_rules[kind] = value
                delattr(cls, prop)

        # build a regular expression to generate a sequence of tokens
        token_patterns = [
            '({})'.format(cls.patterns.get(t, re.escape(t)))
            for t in cls.tokens]
        if cls.ignore:
            token_patterns.append('(?:{})'.format(cls.ignore))
        cls.token_re = re.compile('^(?:' + '|'.join(token_patterns) + ')')

        # build a map from token kind to precedence level
        cls.precedence_map = {
            kind: prec + 1
            for (prec, row) in enumerate(cls.precedence)
            for kind in row
        }


class PrattParser(with_metaclass(PrattParserMeta, object)):

    # regular expression for ignored input (e.g., whitespace)
    ignore = None

    # regular expressions for tokens that do not match themselves
    patterns = {}

    # all token kinds (note that order matters - the first matching token
    # will be returned)
    tokens = []

    # precedence of tokens, as a list of lists, from lowest to highest
    precedence = []

    def parse(self, source):
        pc = ParseContext(self, source, self._generate_tokens(source))
        result = pc.parse()
        # if there are any tokens remaining, that's an error..
        token = pc.attempt()
        if token:
            raise SyntaxError.unexpected(token, self.infix_rules)
        return result

    def parseUntilTerminator(self, source, terminator):
        pc = ParseContext(self, source, self._generate_tokens(source))
        result = pc.parse()
        token = pc.attempt()
        if token.kind != terminator:
            raise SyntaxError.unexpected(token, [terminator])
        return (result, token.start)

    def _generate_tokens(self, source):
        offset = 0
        while True:
            start = offset
            remainder = source[offset:]
            mo = self.token_re.match(remainder)
            if not mo:
                if remainder:
                    raise SyntaxError(
                        "Unexpected input: '{}'".format(remainder))
                break
            offset += mo.end()

            # figure out which token matched (note that idx is 0-based)
            indexes = list(
                filter(lambda x: x[1] is not None, enumerate(mo.groups())))
            if indexes:
                idx = indexes[0][0]
                yield Token(
                    kind=self.tokens[idx],
                    value=mo.group(idx + 1),  # (mo.group is 1-based)
                    start=start,
                    end=offset)


class ParseContext(object):

    def __init__(self, parser, source, token_generator):
        self.parser = parser
        self.source = source

        self._tokens = token_generator
        self._error = None

        self._advance()

    def _advance(self):
        try:
            self.next_token = next(self._tokens)
        except StopIteration:
            self.next_token = None
        except SyntaxError as exc:
            self._error = exc

    def attempt(self, *kinds):
        """Try to get the next token if it matches one of the kinds given,
        otherwise returning None. If no kinds are given, any kind is
        accepted."""
        if self._error:
            raise self._error
        token = self.next_token
        if not token:
            return None
        if kinds and token.kind not in kinds:
            return None
        self._advance()
        return token

    def require(self, *kinds):
        """Get the next token, raising an exception if it doesn't match one of
        the given kinds, or the input ends. If no kinds are given, returns the
        next token of any kind."""
        token = self.attempt()
        if not token:
            raise SyntaxError('Unexpected end of input')
        if kinds and token.kind not in kinds:
            raise SyntaxError.unexpected(token, kinds)
        return token

    def parse(self, precedence=None):
        parser = self.parser
        precedence = parser.precedence_map[precedence] if precedence else 0
        token = self.require()
        prefix_rule = parser.prefix_rules.get(token.kind)
        if not prefix_rule:
            raise SyntaxError.unexpected(token, parser.prefix_rules)
        left = prefix_rule(parser, token, self)
        while self.next_token:
            kind = self.next_token.kind
            if kind not in parser.infix_rules:
                break
            if precedence >= parser.precedence_map[kind]:
                break
            token = self.require()
            left = parser.infix_rules[kind](parser, left, token, self)
        return left