diff options
Diffstat (limited to '')
-rwxr-xr-x | misc/xml-tokens/gen-odf-tokens.py | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/misc/xml-tokens/gen-odf-tokens.py b/misc/xml-tokens/gen-odf-tokens.py new file mode 100755 index 0000000..2838604 --- /dev/null +++ b/misc/xml-tokens/gen-odf-tokens.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python +######################################################################## +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +######################################################################## + +import xml.parsers.expat +import sys +import argparse +from pathlib import Path + +import token_util + + +NS_RNG = "http://relaxng.org/ns/structure/1.0" + + +class TokenParser: + + def __init__ (self, strm): + self.__strm = strm + self.__elem = None + self.tokens = set() + + def start_element(self, name, attrs): + self.__elem = name + if name in {f"{NS_RNG}:element", f"{NS_RNG}:attribute"} and "name" in attrs: + tokens = attrs['name'].split(':') + n = len(tokens) + if n != 2: + sys.stderr.write("unrecognized token type: "+attrs['name']) + sys.exit(1) + + self.tokens.add(tokens[1]) + + for token in tokens: + self.tokens.add(token) + + def character(self, data): + if self.__elem == f"{NS_RNG}:value": + s = data.strip() + if len(s) > 0: + self.tokens.add(s) + + def parse(self): + p = xml.parsers.expat.ParserCreate(encoding="utf-8", namespace_separator=":") + p.StartElementHandler = self.start_element + p.CharacterDataHandler = self.character + p.Parse(self.__strm, 1) + + self.tokens = sorted(self.tokens) + + +class NSParser: + + def __init__ (self, strm): + self.__strm = strm + self.__elem = None + self.ns_values = dict() # namespace values + + def start_element(self, name, attrs): + self.__elem = name + if name.endswith("grammar"): + names = attrs.keys() + for name in names: + tokens = name.split(':') + if len(tokens) < 2 or tokens[0] != "xmlns": + continue + + val = attrs[name] + self.ns_values[tokens[1]] = val + + def parse(self): + p = xml.parsers.expat.ParserCreate(encoding="utf-8") + p.StartElementHandler = self.start_element + p.Parse(self.__strm, 1) + + ns_values = list() + for k, v in self.ns_values.items(): + if v == "http://relaxng.org/ns/structure/1.0": + # skip the relaxNG namespace since it is only used in the schema document. + continue + ns_values.append((k, v)) + + self.ns_values = sorted(ns_values, key=lambda x: x[0]) + + +def gen_namespace_tokens(filepath, ns_values): + + # header (.hpp) + filepath_hpp = filepath + "_hpp.inl" + outfile = open(filepath_hpp, 'w') + outfile.write("namespace orcus {\n\n") + for key, _ in ns_values: + outfile.write("extern const xmlns_id_t NS_odf_") + outfile.write(key) + outfile.write(";\n") + outfile.write("\nextern const xmlns_id_t* NS_odf_all;\n") + outfile.write("\n}\n\n") + outfile.close() + + # source (.cpp) + filepath_cpp = filepath + "_cpp.inl" + outfile = open(filepath_cpp, 'w') + outfile.write("namespace orcus {\n\n") + for key, value in ns_values: + outfile.write("const xmlns_id_t NS_odf_") + outfile.write(key) + outfile.write(" = \"") + outfile.write(value) + outfile.write("\"") + outfile.write(";\n") + + outfile.write("\n") + outfile.write("namespace {\n\n") + outfile.write("const xmlns_id_t odf_ns[] = {\n") + for key, _ in ns_values: + outfile.write(" NS_odf_") + outfile.write(key) + outfile.write(",\n") + outfile.write(" nullptr\n") + outfile.write("};\n\n") + outfile.write("} // anonymous\n\n") + + outfile.write("const xmlns_id_t* NS_odf_all = odf_ns;\n\n") + + outfile.write("}\n\n") + outfile.close() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--ns-file-prefix", type=str, + help="file name prefix for optioal namespace constant files") + parser.add_argument( + "--summary-output", type=Path, + help="optional output file to write collected token data summary") + parser.add_argument( + "--token-constants", type=Path, + help="path to C++ output file where token consants are to be written to") + parser.add_argument( + "--token-names", type=Path, + help="path to C++ output file where token names are to be written to") + parser.add_argument( + "odf_schema", metavar="ODF-SCHEMA", type=Path, help="path to RNG ODF schema file") + args = parser.parse_args() + + if not args.odf_schema.is_file(): + print(f"{args.odf_schema} is not a valid file.", file=sys.stderr) + sys.exit(1) + + schema_content = args.odf_schema.read_text() + parser = TokenParser(schema_content) + parser.parse() + tokens = parser.tokens + + parser = NSParser(schema_content) + parser.parse() + ns_values = parser.ns_values + + if args.summary_output: + summary_content_buf = list() + summary_content_buf.append("list of tokens:") + + for token in tokens: + summary_content_buf.append(f"- \"{token}\"") + + summary_content_buf.append("list of namespaces:") + + for ns, value in ns_values: + summary_content_buf.append(f"- {ns}: \"{value}\"") + + args.summary_output.write_text("\n".join(summary_content_buf)) + + if args.token_constants: + with open(args.token_constants, "w") as f: + token_util.gen_token_constants(f, tokens) + + if args.token_names: + with open(args.token_names, "w") as f: + token_util.gen_token_names(f, tokens) + + if args.ns_file_prefix is not None: + gen_namespace_tokens(args.ns_file_prefix, ns_values) + + +if __name__ == '__main__': + main() |