diff options
Diffstat (limited to 'scripts/docparser.py')
-rw-r--r-- | scripts/docparser.py | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/scripts/docparser.py b/scripts/docparser.py new file mode 100644 index 0000000..a6019c9 --- /dev/null +++ b/scripts/docparser.py @@ -0,0 +1,51 @@ +from bs4 import BeautifulSoup +import sys +import os +import json + + +def get_section(doc, section): + element = doc.find(section) + if element: + return element.get_text() + + +def get_description(doc): + text = get_section(doc, "refsect1") + if text: + lines = filter(lambda x: x.strip(), text.split("\n")) + + if len(lines) > 1 and lines[0] == "Description": + return lines[0] + "\n" + lines[1] + + +def parse(file_name): + with open(file_name, "r") as file: + doc = BeautifulSoup(file.read(), "html.parser") + desc = get_description(doc) + synopsis = get_section(doc, "synopsis") + if desc and synopsis: + return {"description": desc, "synopsis": synopsis} + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Parse postgres SGML reference files into JSON") + print("Usage:") + print( + 'echo -n "commands = " > command_help.py; python parser.py ref/ | python -mjson.tool | sed \'s/"\\: null/": None/g\' >> command_help.py' + ) + print("") + sys.exit(0) + + dir = sys.argv[1] + docs = {} + + for file_name in os.listdir(dir): + if file_name.endswith(".sgml"): + path = dir.rstrip("/") + "/" + file_name + command = file_name[:-5].replace("_", " ") + parsed = parse(path) + if parsed: + docs[command.upper()] = parsed + print(json.dumps(docs)) |