diff options
Diffstat (limited to 'doc/misc/parsegrammar.py')
-rw-r--r-- | doc/misc/parsegrammar.py | 194 |
1 files changed, 194 insertions, 0 deletions
diff --git a/doc/misc/parsegrammar.py b/doc/misc/parsegrammar.py new file mode 100644 index 0000000..b3fede0 --- /dev/null +++ b/doc/misc/parsegrammar.py @@ -0,0 +1,194 @@ +############################################################################ +# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +# +# SPDX-License-Identifier: MPL-2.0 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, you can obtain one at https://mozilla.org/MPL/2.0/. +# +# See the COPYRIGHT file distributed with this work for additional +# information regarding copyright ownership. +############################################################################ + +""" +Read ISC config grammar description produced by "cfg_test --grammar", +transform it into JSON, and print it to stdout. + +Beware: This parser is pretty dumb and heavily depends on cfg_test output +format. See parse_mapbody() for more details. + +Maps are recursively parsed into sub-dicts, all other elements (lists etc.) +are left intact and returned as one string. + +Output example from named.conf grammar showing three variants follow. +Keys "_flags" and "_id" are present only if non-empty. Key "_grammar" denotes +end node, key "_mapbody" denotes a nested map. + +{ + "acl": { + "_flags": [ + "may occur multiple times" + ], + "_grammar": "<string> { <address_match_element>; ... }" + }, + "http": { + "_flags": [ + "may occur multiple times" + ], + "_id": "<string>", + "_mapbody": { + "endpoints": { + "_grammar": "{ <quoted_string>; ... }" + }, + "streams-per-connection": { + "_grammar": "<integer>" + } + } + }, + "options": { + "_mapbody": { + "rate-limit": { + "_mapbody": { + "all-per-second": { + "_grammar": "<integer>" + } + } + } + } + } +} +""" +import fileinput +import json +import re + +FLAGS = [ + "may occur multiple times", + "obsolete", + "deprecated", + "experimental", + "test only", +] + +KEY_REGEX = re.compile("[a-zA-Z0-9-]+") + + +def split_comments(line): + """Split line on comment boundary and strip right-side whitespace. + Supports only #, //, and /* comments which end at the end of line. + It does NOT handle: + - quoted strings + - /* comments which do not end at line boundary + - multiple /* comments on a single line + """ + assert '"' not in line, 'lines with " are not supported' + data_end_idx = len(line) + for delimiter in ["#", "//", "/*"]: + try: + data_end_idx = min(line.index(delimiter), data_end_idx) + except ValueError: + continue + if delimiter == "/*": + # sanity checks + if not line.rstrip().endswith("*/"): + raise NotImplementedError( + "unsupported /* comment, does not end at the end of line", line + ) + if "/*" in line[data_end_idx + 1 :]: + raise NotImplementedError( + "unsupported line with multiple /* comments", line + ) + + noncomment = line[:data_end_idx] + comment = line[data_end_idx:] + return noncomment, comment + + +def parse_line(filein): + """Consume single line from input, return non-comment and comment.""" + for line in filein: + line, comment = split_comments(line) + line = line.strip() + comment = comment.strip() + if not line: + continue + yield line, comment + + +def parse_flags(comments): + """Extract known flags from comments. Must match exact strings used by cfg_test.""" + out = [] + for flag in FLAGS: + if flag in comments: + out.append(flag) + return out + + +def parse_mapbody(filein): + """Parse body of a "map" in ISC config format. + + Input lines can be only: + - whitespace & comments only -> ignore + - <keyword> <anything>; -> store <anything> as "_grammar" for this keyword + - <keyword> <anything> { -> parse sub-map and store (optional) <anything> as "_id", + producing nested dict under "_mapbody" + Also store known strings found at the end of line in "_flags". + + Returns: + - tuple (map dict, map comment) when }; line is reached + - map dict when we run out of lines without the closing }; + """ + thismap = {} + for line, comment in parse_line(filein): + flags = parse_flags(comment) + if line == "};": # end of a nested map + return thismap, flags + + # first word - a map key name + # beware: some statements do not have parameters, e.g. "null;" + key = line.split()[0].rstrip(";") + # map key sanity check + if not KEY_REGEX.fullmatch(key): + raise NotImplementedError("suspicious keyword detected", line) + + # omit keyword from the grammar + grammar = line[len(key) :].strip() + # also skip final ; or { + grammar = grammar[:-1].strip() + + thismap[key] = {} + if line.endswith("{"): + # nested map, recurse, but keep "extra identifiers" if any + try: + subkeys, flags = parse_mapbody(filein) + except ValueError: + raise ValueError("unfinished nested map, missing }; detected") from None + if flags: + thismap[key]["_flags"] = flags + if grammar: + # for lines which look like "view <name> {" store "<name>" + thismap[key]["_id"] = grammar + thismap[key]["_mapbody"] = subkeys + else: + assert line.endswith(";") + if flags: + thismap[key]["_flags"] = flags + thismap[key]["_grammar"] = grammar + + # Ran out of lines: can happen only on the end of the top-level map-body! + # Intentionally do not return second parameter to cause ValueError + # if we reach this spot with a missing }; in a nested map. + assert len(thismap) + return thismap + + +def main(): + """Read stdin or filename provided on command line""" + with fileinput.input() as filein: + grammar = parse_mapbody(filein) + print(json.dumps(grammar, indent=4)) + + +if __name__ == "__main__": + main() |