summaryrefslogtreecommitdiffstats
path: root/doc/misc/parsegrammar.py
diff options
context:
space:
mode:
Diffstat (limited to 'doc/misc/parsegrammar.py')
-rw-r--r--doc/misc/parsegrammar.py194
1 files changed, 194 insertions, 0 deletions
diff --git a/doc/misc/parsegrammar.py b/doc/misc/parsegrammar.py
new file mode 100644
index 0000000..b3fede0
--- /dev/null
+++ b/doc/misc/parsegrammar.py
@@ -0,0 +1,194 @@
+############################################################################
+# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+#
+# SPDX-License-Identifier: MPL-2.0
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, you can obtain one at https://mozilla.org/MPL/2.0/.
+#
+# See the COPYRIGHT file distributed with this work for additional
+# information regarding copyright ownership.
+############################################################################
+
+"""
+Read ISC config grammar description produced by "cfg_test --grammar",
+transform it into JSON, and print it to stdout.
+
+Beware: This parser is pretty dumb and heavily depends on cfg_test output
+format. See parse_mapbody() for more details.
+
+Maps are recursively parsed into sub-dicts, all other elements (lists etc.)
+are left intact and returned as one string.
+
+Output example from named.conf grammar showing three variants follow.
+Keys "_flags" and "_id" are present only if non-empty. Key "_grammar" denotes
+end node, key "_mapbody" denotes a nested map.
+
+{
+ "acl": {
+ "_flags": [
+ "may occur multiple times"
+ ],
+ "_grammar": "<string> { <address_match_element>; ... }"
+ },
+ "http": {
+ "_flags": [
+ "may occur multiple times"
+ ],
+ "_id": "<string>",
+ "_mapbody": {
+ "endpoints": {
+ "_grammar": "{ <quoted_string>; ... }"
+ },
+ "streams-per-connection": {
+ "_grammar": "<integer>"
+ }
+ }
+ },
+ "options": {
+ "_mapbody": {
+ "rate-limit": {
+ "_mapbody": {
+ "all-per-second": {
+ "_grammar": "<integer>"
+ }
+ }
+ }
+ }
+ }
+}
+"""
+import fileinput
+import json
+import re
+
+FLAGS = [
+ "may occur multiple times",
+ "obsolete",
+ "deprecated",
+ "experimental",
+ "test only",
+]
+
+KEY_REGEX = re.compile("[a-zA-Z0-9-]+")
+
+
+def split_comments(line):
+ """Split line on comment boundary and strip right-side whitespace.
+ Supports only #, //, and /* comments which end at the end of line.
+ It does NOT handle:
+ - quoted strings
+ - /* comments which do not end at line boundary
+ - multiple /* comments on a single line
+ """
+ assert '"' not in line, 'lines with " are not supported'
+ data_end_idx = len(line)
+ for delimiter in ["#", "//", "/*"]:
+ try:
+ data_end_idx = min(line.index(delimiter), data_end_idx)
+ except ValueError:
+ continue
+ if delimiter == "/*":
+ # sanity checks
+ if not line.rstrip().endswith("*/"):
+ raise NotImplementedError(
+ "unsupported /* comment, does not end at the end of line", line
+ )
+ if "/*" in line[data_end_idx + 1 :]:
+ raise NotImplementedError(
+ "unsupported line with multiple /* comments", line
+ )
+
+ noncomment = line[:data_end_idx]
+ comment = line[data_end_idx:]
+ return noncomment, comment
+
+
+def parse_line(filein):
+ """Consume single line from input, return non-comment and comment."""
+ for line in filein:
+ line, comment = split_comments(line)
+ line = line.strip()
+ comment = comment.strip()
+ if not line:
+ continue
+ yield line, comment
+
+
+def parse_flags(comments):
+ """Extract known flags from comments. Must match exact strings used by cfg_test."""
+ out = []
+ for flag in FLAGS:
+ if flag in comments:
+ out.append(flag)
+ return out
+
+
+def parse_mapbody(filein):
+ """Parse body of a "map" in ISC config format.
+
+ Input lines can be only:
+ - whitespace & comments only -> ignore
+ - <keyword> <anything>; -> store <anything> as "_grammar" for this keyword
+ - <keyword> <anything> { -> parse sub-map and store (optional) <anything> as "_id",
+ producing nested dict under "_mapbody"
+ Also store known strings found at the end of line in "_flags".
+
+ Returns:
+ - tuple (map dict, map comment) when }; line is reached
+ - map dict when we run out of lines without the closing };
+ """
+ thismap = {}
+ for line, comment in parse_line(filein):
+ flags = parse_flags(comment)
+ if line == "};": # end of a nested map
+ return thismap, flags
+
+ # first word - a map key name
+ # beware: some statements do not have parameters, e.g. "null;"
+ key = line.split()[0].rstrip(";")
+ # map key sanity check
+ if not KEY_REGEX.fullmatch(key):
+ raise NotImplementedError("suspicious keyword detected", line)
+
+ # omit keyword from the grammar
+ grammar = line[len(key) :].strip()
+ # also skip final ; or {
+ grammar = grammar[:-1].strip()
+
+ thismap[key] = {}
+ if line.endswith("{"):
+ # nested map, recurse, but keep "extra identifiers" if any
+ try:
+ subkeys, flags = parse_mapbody(filein)
+ except ValueError:
+ raise ValueError("unfinished nested map, missing }; detected") from None
+ if flags:
+ thismap[key]["_flags"] = flags
+ if grammar:
+ # for lines which look like "view <name> {" store "<name>"
+ thismap[key]["_id"] = grammar
+ thismap[key]["_mapbody"] = subkeys
+ else:
+ assert line.endswith(";")
+ if flags:
+ thismap[key]["_flags"] = flags
+ thismap[key]["_grammar"] = grammar
+
+ # Ran out of lines: can happen only on the end of the top-level map-body!
+ # Intentionally do not return second parameter to cause ValueError
+ # if we reach this spot with a missing }; in a nested map.
+ assert len(thismap)
+ return thismap
+
+
+def main():
+ """Read stdin or filename provided on command line"""
+ with fileinput.input() as filein:
+ grammar = parse_mapbody(filein)
+ print(json.dumps(grammar, indent=4))
+
+
+if __name__ == "__main__":
+ main()