#!/usr/bin/env python

"""parse_pgen.py - Parse grammars written in the pgen parser specification language.

I'm not sure I want to keep this pgen mini-language around; ignore this for now.
"""

import sys
from collections import namedtuple

from .lexer import LexicalGrammar
from .grammar import Grammar, Production, CallMethod, is_concrete_element, Optional
from . import gen
from . import parse_pgen_generated


pgen_lexer = LexicalGrammar(
    "goal nt var token { } ; ? = => ( ) ,",
    r'([ \t\r\n]|#.*)*',
    IDENT=r'[A-Za-z_](?:\w|[_-])*',
    STR=r'"[^\\\n"]*"',
    MATCH=r'\$(?:0|[1-9][0-9]*)',
    COMMENT=r'//.*',
)


def list_of(e, allow_comments=False):
    nt = e + 's'
    prods = [
        Production([e], CallMethod('single', (0,))),
        Production([nt, e], CallMethod('append', (0, 1))),
    ]
    if allow_comments:
        prods.append(Production(['COMMENT'], CallMethod('empty', (0,))))
    return prods


def call_method(name, body):
    arg_indexes = []
    current = 0
    for e in body:
        if is_concrete_element(e):
            if e not in discards:
                arg_indexes.append(current)
            current += 1

    return CallMethod(name, tuple(arg_indexes))


def prod(body, reducer):
    if isinstance(reducer, str):
        reducer = call_method(reducer, body)
    return Production(body, reducer)


discards = set('token var nt goal Some None = => ; ( ) { } , ?'.split())

pgen_grammar = Grammar(
    {
        'grammar': [
            [Optional('token_defs'), 'nt_defs']
        ],
        'token_defs': list_of('token_def'),
        'token_def': [
            prod(['token', 'IDENT', '=', 'STR', ';'], 'const_token'),
            prod(['var', 'token', 'IDENT', ';'], 'var_token'),
        ],
        'nt_defs': [
            prod(['nt_def'], 'nt_defs_single'),
            prod(['nt_defs', 'nt_def'], 'nt_defs_append'),
        ],
        'nt_def': [
            prod([Optional('COMMENT'), Optional('goal'), 'nt', 'IDENT', '{',
                  Optional('prods'), '}'], 'nt_def'),
        ],
        'prods': list_of('prod', allow_comments=True),
        'prod': [
            prod(['terms', Optional('reducer'), ';'], 'prod'),
        ],
        'terms': list_of('term'),
        'term': [
            ['symbol'],
            prod(['symbol', '?'], 'optional'),
        ],
        'symbol': [
            prod(['IDENT'], 'ident'),
            prod(['STR'], 'str'),
        ],
        'reducer': [
            prod(['=>', 'expr'], 1)
        ],
        'expr': [
            prod(['MATCH'], 'expr_match'),
            prod(['IDENT', '(', Optional('expr_args'), ')'], 'expr_call'),
            prod(['Some', '(', 'expr', ')'], 'expr_some'),
            prod(['None'], 'expr_none'),
        ],
        'expr_args': [
            prod(['expr'], 'args_single'),
            prod(['expr_args', ',', 'expr'], 'args_append'),
        ],
    },
    goal_nts=['grammar'],
    variable_terminals=['IDENT', 'STR', 'MATCH', 'COMMENT']
)


Literal = namedtuple("Literal", "chars")

default_token_list = [
    ("Var", "var"),
    ("Token", "token"),
    ("Goal", "goal"),
    ("Nt", "nt"),
    ("IDENT", None),
    ("STR", None),
    ("OpenBrace", "{"),
    ("CloseBrace", "}"),
    ("OpenParenthesis", "("),
    ("CloseParenthesis", ")"),
    ("Colon", ":"),
    ("EqualSign", "="),
    ("Asterisk", "*"),
    ("PlusSign", "+"),
    ("MinusSign", "-"),
    ("Slash", "/"),
    ("Semicolon", ";"),
    ("QuestionMark", "?"),
    ("RightArrow", "->"),
    ("Comma", ","),
]


class AstBuilder:
    def grammar(self, token_defs, nt_defs):
        nonterminals, goal_nts = nt_defs
        return (token_defs or default_token_list, nonterminals, goal_nts)

    def empty(self, value):
        return []

    def single(self, value):
        return [value]

    def append(self, values, value):
        values.append(value)
        return values

    def const_token(self, name, picture):
        assert picture[0] == '"'
        assert picture[-1] == '"'
        return (name, picture[1:-1])

    def var_token(self, name):
        return (name, None)

    def comment(self, comment):
        pass

    def nt_defs_single(self, nt_def):
        return self.nt_defs_append(({}, []), nt_def)

    def nt_defs_append(self, grammar_in, nt_def):
        is_goal, nt, prods = nt_def
        grammar, goal_nts = grammar_in
        if nt in grammar:
            raise ValueError("multiple definitions for nt {}".format(nt))
        grammar[nt] = prods
        if is_goal:
            goal_nts.append(nt)
        return grammar, goal_nts

    def nt_def(self, _comment, goal_kw, ident, prods):
        is_goal = goal_kw == "goal"
        prods = [Production(body, reducer) for body, reducer in prods]
        return (is_goal, ident, prods)

    def prod(self, symbols, reducer):
        if reducer is None:
            if sum(1 for e in symbols if is_concrete_element(e)) == 1:
                reducer = 0
            else:
                raise ValueError("reducer required for {!r}".format(symbols))
        return (symbols, reducer)

    def optional(self, sym):
        return Optional(sym)

    def ident(self, sym):
        return sym

    def str(self, sym):
        assert len(sym) > 1
        assert sym[0] == '"'
        assert sym[-1] == '"'
        chars = sym[1:-1]  # This is a bit sloppy.
        return Literal(chars)

    def expr_match(self, match):
        assert match.startswith('$')
        return int(match[1:])

    def expr_call(self, ident, args):
        return CallMethod(ident, tuple(args or ()))

    def args_single(self, expr):
        return [expr]

    def args_append(self, args, arg):
        args.append(arg)
        return args


def check_grammar(result):
    tokens, nonterminals, goal_nts = result
    tokens_by_name = {}
    tokens_by_image = {}
    for name, image in tokens:
        if name in tokens_by_name:
            raise ValueError("token `{}` redeclared".format(name))
        tokens_by_name[name] = image
        if image is not None and image in tokens_by_image:
            raise ValueError("multiple tokens look like \"{}\"".format(image))
        tokens_by_image[image] = name
        if name in nonterminals:
            raise ValueError("`{}` is declared as both a token and a nonterminal (pick one)".format(name))

    def check_element(nt, i, e):
        if isinstance(e, Optional):
            return Optional(check_element(nt, i, e.inner))
        elif isinstance(e, Literal):
            if e.chars not in tokens_by_image:
                raise ValueError("in {} production {}: undeclared token \"{}\"".format(nt, i, e.chars))
            return e.chars
        else:
            assert isinstance(e, str), e.__class__.__name__
            if e in nonterminals:
                return e
            elif e in tokens_by_name:
                image = tokens_by_name[e]
                if image is not None:
                    return image
                return e
            else:
                raise ValueError("in {} production {}: undeclared symbol {}".format(nt, i, e))

    out = {nt: [] for nt in nonterminals}
    for nt, rhs_list in nonterminals.items():
        for i, p in enumerate(rhs_list):
            out_rhs = [check_element(nt, i, e) for e in p.body]
            out[nt].append(p.copy_with(body=out_rhs))

    return (tokens, out, goal_nts)


def load_grammar(filename):
    with open(filename) as f:
        text = f.read()
    parser = parse_pgen_generated.Parser(builder=AstBuilder())
    lexer = pgen_lexer(parser, filename=filename)
    lexer.write(text)
    result = lexer.close()
    tokens, nonterminals, goals = check_grammar(result)
    variable_terminals = [name for name, image in tokens if image is None]
    return Grammar(nonterminals,
                   goal_nts=goals,
                   variable_terminals=variable_terminals)


def regenerate():
    import sys
    gen.generate_parser(sys.stdout, pgen_grammar)


if __name__ == '__main__':
    if sys.argv[1:] == ['--regenerate']:
        regenerate()
    else:
        print("usage: python -m jsparagus.parse_pgen --regenerate")
        sys.exit(1)