summaryrefslogtreecommitdiffstats
path: root/misc/xml-tokens/gen-odf-tokens.py
diff options
context:
space:
mode:
Diffstat (limited to 'misc/xml-tokens/gen-odf-tokens.py')
-rwxr-xr-xmisc/xml-tokens/gen-odf-tokens.py192
1 files changed, 192 insertions, 0 deletions
diff --git a/misc/xml-tokens/gen-odf-tokens.py b/misc/xml-tokens/gen-odf-tokens.py
new file mode 100755
index 0000000..2838604
--- /dev/null
+++ b/misc/xml-tokens/gen-odf-tokens.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import xml.parsers.expat
+import sys
+import argparse
+from pathlib import Path
+
+import token_util
+
+
+NS_RNG = "http://relaxng.org/ns/structure/1.0"
+
+
+class TokenParser:
+
+ def __init__ (self, strm):
+ self.__strm = strm
+ self.__elem = None
+ self.tokens = set()
+
+ def start_element(self, name, attrs):
+ self.__elem = name
+ if name in {f"{NS_RNG}:element", f"{NS_RNG}:attribute"} and "name" in attrs:
+ tokens = attrs['name'].split(':')
+ n = len(tokens)
+ if n != 2:
+ sys.stderr.write("unrecognized token type: "+attrs['name'])
+ sys.exit(1)
+
+ self.tokens.add(tokens[1])
+
+ for token in tokens:
+ self.tokens.add(token)
+
+ def character(self, data):
+ if self.__elem == f"{NS_RNG}:value":
+ s = data.strip()
+ if len(s) > 0:
+ self.tokens.add(s)
+
+ def parse(self):
+ p = xml.parsers.expat.ParserCreate(encoding="utf-8", namespace_separator=":")
+ p.StartElementHandler = self.start_element
+ p.CharacterDataHandler = self.character
+ p.Parse(self.__strm, 1)
+
+ self.tokens = sorted(self.tokens)
+
+
+class NSParser:
+
+ def __init__ (self, strm):
+ self.__strm = strm
+ self.__elem = None
+ self.ns_values = dict() # namespace values
+
+ def start_element(self, name, attrs):
+ self.__elem = name
+ if name.endswith("grammar"):
+ names = attrs.keys()
+ for name in names:
+ tokens = name.split(':')
+ if len(tokens) < 2 or tokens[0] != "xmlns":
+ continue
+
+ val = attrs[name]
+ self.ns_values[tokens[1]] = val
+
+ def parse(self):
+ p = xml.parsers.expat.ParserCreate(encoding="utf-8")
+ p.StartElementHandler = self.start_element
+ p.Parse(self.__strm, 1)
+
+ ns_values = list()
+ for k, v in self.ns_values.items():
+ if v == "http://relaxng.org/ns/structure/1.0":
+ # skip the relaxNG namespace since it is only used in the schema document.
+ continue
+ ns_values.append((k, v))
+
+ self.ns_values = sorted(ns_values, key=lambda x: x[0])
+
+
+def gen_namespace_tokens(filepath, ns_values):
+
+ # header (.hpp)
+ filepath_hpp = filepath + "_hpp.inl"
+ outfile = open(filepath_hpp, 'w')
+ outfile.write("namespace orcus {\n\n")
+ for key, _ in ns_values:
+ outfile.write("extern const xmlns_id_t NS_odf_")
+ outfile.write(key)
+ outfile.write(";\n")
+ outfile.write("\nextern const xmlns_id_t* NS_odf_all;\n")
+ outfile.write("\n}\n\n")
+ outfile.close()
+
+ # source (.cpp)
+ filepath_cpp = filepath + "_cpp.inl"
+ outfile = open(filepath_cpp, 'w')
+ outfile.write("namespace orcus {\n\n")
+ for key, value in ns_values:
+ outfile.write("const xmlns_id_t NS_odf_")
+ outfile.write(key)
+ outfile.write(" = \"")
+ outfile.write(value)
+ outfile.write("\"")
+ outfile.write(";\n")
+
+ outfile.write("\n")
+ outfile.write("namespace {\n\n")
+ outfile.write("const xmlns_id_t odf_ns[] = {\n")
+ for key, _ in ns_values:
+ outfile.write(" NS_odf_")
+ outfile.write(key)
+ outfile.write(",\n")
+ outfile.write(" nullptr\n")
+ outfile.write("};\n\n")
+ outfile.write("} // anonymous\n\n")
+
+ outfile.write("const xmlns_id_t* NS_odf_all = odf_ns;\n\n")
+
+ outfile.write("}\n\n")
+ outfile.close()
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--ns-file-prefix", type=str,
+ help="file name prefix for optioal namespace constant files")
+ parser.add_argument(
+ "--summary-output", type=Path,
+ help="optional output file to write collected token data summary")
+ parser.add_argument(
+ "--token-constants", type=Path,
+ help="path to C++ output file where token consants are to be written to")
+ parser.add_argument(
+ "--token-names", type=Path,
+ help="path to C++ output file where token names are to be written to")
+ parser.add_argument(
+ "odf_schema", metavar="ODF-SCHEMA", type=Path, help="path to RNG ODF schema file")
+ args = parser.parse_args()
+
+ if not args.odf_schema.is_file():
+ print(f"{args.odf_schema} is not a valid file.", file=sys.stderr)
+ sys.exit(1)
+
+ schema_content = args.odf_schema.read_text()
+ parser = TokenParser(schema_content)
+ parser.parse()
+ tokens = parser.tokens
+
+ parser = NSParser(schema_content)
+ parser.parse()
+ ns_values = parser.ns_values
+
+ if args.summary_output:
+ summary_content_buf = list()
+ summary_content_buf.append("list of tokens:")
+
+ for token in tokens:
+ summary_content_buf.append(f"- \"{token}\"")
+
+ summary_content_buf.append("list of namespaces:")
+
+ for ns, value in ns_values:
+ summary_content_buf.append(f"- {ns}: \"{value}\"")
+
+ args.summary_output.write_text("\n".join(summary_content_buf))
+
+ if args.token_constants:
+ with open(args.token_constants, "w") as f:
+ token_util.gen_token_constants(f, tokens)
+
+ if args.token_names:
+ with open(args.token_names, "w") as f:
+ token_util.gen_token_names(f, tokens)
+
+ if args.ns_file_prefix is not None:
+ gen_namespace_tokens(args.ns_file_prefix, ns_values)
+
+
+if __name__ == '__main__':
+ main()