// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package ebnf import ( "io" "strconv" "text/scanner" ) type parser struct { errors errorList scanner scanner.Scanner pos scanner.Position // token position tok rune // one token look-ahead lit string // token literal } func (p *parser) next() { p.tok = p.scanner.Scan() p.pos = p.scanner.Position p.lit = p.scanner.TokenText() } func (p *parser) error(pos scanner.Position, msg string) { p.errors = append(p.errors, newError(pos, msg)) } func (p *parser) errorExpected(pos scanner.Position, msg string) { msg = `expected "` + msg + `"` if pos.Offset == p.pos.Offset { // the error happened at the current position; // make the error message more specific msg += ", found " + scanner.TokenString(p.tok) if p.tok < 0 { msg += " " + p.lit } } p.error(pos, msg) } func (p *parser) expect(tok rune) scanner.Position { pos := p.pos if p.tok != tok { p.errorExpected(pos, scanner.TokenString(tok)) } p.next() // make progress in any case return pos } func (p *parser) parseIdentifier() *Name { pos := p.pos name := p.lit p.expect(scanner.Ident) return &Name{pos, name} } func (p *parser) parseToken() *Token { pos := p.pos value := "" if p.tok == scanner.String || p.tok == scanner.RawString { value, _ = strconv.Unquote(p.lit) // Unquote may fail with an error, but only if the scanner found // an illegal string in the first place. In this case the error // has already been reported. p.next() } else { p.expect(scanner.String) } return &Token{pos, value} } // parseTerm returns nil if no term was found. func (p *parser) parseTerm() (x Expression) { pos := p.pos switch p.tok { case scanner.Ident: x = p.parseIdentifier() case scanner.String, scanner.RawString: tok := p.parseToken() x = tok const ellipsis = '…' // U+2026, the horizontal ellipsis character if p.tok == ellipsis { p.next() x = &Range{tok, p.parseToken()} } case '(': p.next() x = &Group{pos, p.parseExpression()} p.expect(')') case '[': p.next() x = &Option{pos, p.parseExpression()} p.expect(']') case '{': p.next() x = &Repetition{pos, p.parseExpression()} p.expect('}') } return x } func (p *parser) parseSequence() Expression { var list Sequence for x := p.parseTerm(); x != nil; x = p.parseTerm() { list = append(list, x) } // no need for a sequence if list.Len() < 2 switch len(list) { case 0: p.errorExpected(p.pos, "term") return &Bad{p.pos, "term expected"} case 1: return list[0] } return list } func (p *parser) parseExpression() Expression { var list Alternative for { list = append(list, p.parseSequence()) if p.tok != '|' { break } p.next() } // len(list) > 0 // no need for an Alternative node if list.Len() < 2 if len(list) == 1 { return list[0] } return list } func (p *parser) parseProduction() *Production { name := p.parseIdentifier() p.expect('=') var expr Expression if p.tok != '.' { expr = p.parseExpression() } p.expect('.') return &Production{name, expr} } func (p *parser) parse(filename string, src io.Reader) Grammar { p.scanner.Init(src) p.scanner.Filename = filename p.next() // initializes pos, tok, lit grammar := make(Grammar) for p.tok != scanner.EOF { prod := p.parseProduction() name := prod.Name.String if _, found := grammar[name]; !found { grammar[name] = prod } else { p.error(prod.Pos(), name+" declared already") } } return grammar } // Parse parses a set of EBNF productions from source src. // It returns a set of productions. Errors are reported // for incorrect syntax and if a production is declared // more than once; the filename is used only for error // positions. func Parse(filename string, src io.Reader) (Grammar, error) { var p parser grammar := p.parse(filename, src) return grammar, p.errors.Err() }