// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package ebnf is a library for EBNF grammars. The input is text ([]byte) // satisfying the following grammar (represented itself in EBNF): // // Production = name "=" [ Expression ] "." . // Expression = Alternative { "|" Alternative } . // Alternative = Term { Term } . // Term = name | token [ "…" token ] | Group | Option | Repetition . // Group = "(" Expression ")" . // Option = "[" Expression "]" . // Repetition = "{" Expression "}" . // // A name is a Go identifier, a token is a Go string, and comments // and white space follow the same rules as for the Go language. // Production names starting with an uppercase Unicode letter denote // non-terminal productions (i.e., productions which allow white-space // and comments between tokens); all other production names denote // lexical productions. package ebnf // import "golang.org/x/exp/ebnf" import ( "errors" "fmt" "text/scanner" "unicode" "unicode/utf8" ) // ---------------------------------------------------------------------------- // Error handling type errorList []error func (list errorList) Err() error { if len(list) == 0 { return nil } return list } func (list errorList) Error() string { switch len(list) { case 0: return "no errors" case 1: return list[0].Error() } return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1) } func newError(pos scanner.Position, msg string) error { return errors.New(fmt.Sprintf("%s: %s", pos, msg)) } // ---------------------------------------------------------------------------- // Internal representation type ( // An Expression node represents a production expression. Expression interface { // Pos is the position of the first character of the syntactic construct Pos() scanner.Position } // An Alternative node represents a non-empty list of alternative expressions. Alternative []Expression // x | y | z // A Sequence node represents a non-empty list of sequential expressions. Sequence []Expression // x y z // A Name node represents a production name. Name struct { StringPos scanner.Position String string } // A Token node represents a literal. Token struct { StringPos scanner.Position String string } // A List node represents a range of characters. Range struct { Begin, End *Token // begin ... end } // A Group node represents a grouped expression. Group struct { Lparen scanner.Position Body Expression // (body) } // An Option node represents an optional expression. Option struct { Lbrack scanner.Position Body Expression // [body] } // A Repetition node represents a repeated expression. Repetition struct { Lbrace scanner.Position Body Expression // {body} } // A Production node represents an EBNF production. Production struct { Name *Name Expr Expression } // A Bad node stands for pieces of source code that lead to a parse error. Bad struct { TokPos scanner.Position Error string // parser error message } // A Grammar is a set of EBNF productions. The map // is indexed by production name. // Grammar map[string]*Production ) func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences func (x *Name) Pos() scanner.Position { return x.StringPos } func (x *Token) Pos() scanner.Position { return x.StringPos } func (x *Range) Pos() scanner.Position { return x.Begin.Pos() } func (x *Group) Pos() scanner.Position { return x.Lparen } func (x *Option) Pos() scanner.Position { return x.Lbrack } func (x *Repetition) Pos() scanner.Position { return x.Lbrace } func (x *Production) Pos() scanner.Position { return x.Name.Pos() } func (x *Bad) Pos() scanner.Position { return x.TokPos } // ---------------------------------------------------------------------------- // Grammar verification func isLexical(name string) bool { ch, _ := utf8.DecodeRuneInString(name) return !unicode.IsUpper(ch) } type verifier struct { errors errorList worklist []*Production reached Grammar // set of productions reached from (and including) the root production grammar Grammar } func (v *verifier) error(pos scanner.Position, msg string) { v.errors = append(v.errors, newError(pos, msg)) } func (v *verifier) push(prod *Production) { name := prod.Name.String if _, found := v.reached[name]; !found { v.worklist = append(v.worklist, prod) v.reached[name] = prod } } func (v *verifier) verifyChar(x *Token) rune { s := x.String if utf8.RuneCountInString(s) != 1 { v.error(x.Pos(), "single char expected, found "+s) return 0 } ch, _ := utf8.DecodeRuneInString(s) return ch } func (v *verifier) verifyExpr(expr Expression, lexical bool) { switch x := expr.(type) { case nil: // empty expression case Alternative: for _, e := range x { v.verifyExpr(e, lexical) } case Sequence: for _, e := range x { v.verifyExpr(e, lexical) } case *Name: // a production with this name must exist; // add it to the worklist if not yet processed if prod, found := v.grammar[x.String]; found { v.push(prod) } else { v.error(x.Pos(), "missing production "+x.String) } // within a lexical production references // to non-lexical productions are invalid if lexical && !isLexical(x.String) { v.error(x.Pos(), "reference to non-lexical production "+x.String) } case *Token: // nothing to do for now case *Range: i := v.verifyChar(x.Begin) j := v.verifyChar(x.End) if i >= j { v.error(x.Pos(), "decreasing character range") } case *Group: v.verifyExpr(x.Body, lexical) case *Option: v.verifyExpr(x.Body, lexical) case *Repetition: v.verifyExpr(x.Body, lexical) case *Bad: v.error(x.Pos(), x.Error) default: panic(fmt.Sprintf("internal error: unexpected type %T", expr)) } } func (v *verifier) verify(grammar Grammar, start string) { // find root production root, found := grammar[start] if !found { var noPos scanner.Position v.error(noPos, "no start production "+start) return } // initialize verifier v.worklist = v.worklist[0:0] v.reached = make(Grammar) v.grammar = grammar // work through the worklist v.push(root) for { n := len(v.worklist) - 1 if n < 0 { break } prod := v.worklist[n] v.worklist = v.worklist[0:n] v.verifyExpr(prod.Expr, isLexical(prod.Name.String)) } // check if all productions were reached if len(v.reached) < len(v.grammar) { for name, prod := range v.grammar { if _, found := v.reached[name]; !found { v.error(prod.Pos(), name+" is unreachable") } } } } // Verify checks that: // - all productions used are defined // - all productions defined are used when beginning at start // - lexical productions refer only to other lexical productions // // Position information is interpreted relative to the file set fset. func Verify(grammar Grammar, start string) error { var v verifier v.verify(grammar, start) return v.errors.Err() }