diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/README.rst | 21 | ||||
-rw-r--r-- | scripts/docparser.py | 51 |
2 files changed, 72 insertions, 0 deletions
diff --git a/scripts/README.rst b/scripts/README.rst new file mode 100644 index 0000000..6d96afd --- /dev/null +++ b/scripts/README.rst @@ -0,0 +1,21 @@ +Scripts +-------------------------- + +**docparser.py** + +Parses SGML files containing Postgres command information and converts them +into a JSON data structure, this is the converted into a python dictionary +and saved to `pgspecial/help/commands.py` + +This should me manually ran and the results committed after each new release +of the main Postgres project. + +SGML files can be found: https://github.com/postgres/postgres/tree/master/doc/src/sgml/ref +Grab a copy of this directory on your local system. + +**Usage** + +:: + pip install beautifulsoup4 + # From root of project + echo -n "helpcommands = " > pgspecial/help/commands.py; python scripts/docparser.py ref/ | python -mjson.tool | sed 's/"\: null/": None/g' >> pgspecial/help/commands.py diff --git a/scripts/docparser.py b/scripts/docparser.py new file mode 100644 index 0000000..a6019c9 --- /dev/null +++ b/scripts/docparser.py @@ -0,0 +1,51 @@ +from bs4 import BeautifulSoup +import sys +import os +import json + + +def get_section(doc, section): + element = doc.find(section) + if element: + return element.get_text() + + +def get_description(doc): + text = get_section(doc, "refsect1") + if text: + lines = filter(lambda x: x.strip(), text.split("\n")) + + if len(lines) > 1 and lines[0] == "Description": + return lines[0] + "\n" + lines[1] + + +def parse(file_name): + with open(file_name, "r") as file: + doc = BeautifulSoup(file.read(), "html.parser") + desc = get_description(doc) + synopsis = get_section(doc, "synopsis") + if desc and synopsis: + return {"description": desc, "synopsis": synopsis} + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Parse postgres SGML reference files into JSON") + print("Usage:") + print( + 'echo -n "commands = " > command_help.py; python parser.py ref/ | python -mjson.tool | sed \'s/"\\: null/": None/g\' >> command_help.py' + ) + print("") + sys.exit(0) + + dir = sys.argv[1] + docs = {} + + for file_name in os.listdir(dir): + if file_name.endswith(".sgml"): + path = dir.rstrip("/") + "/" + file_name + command = file_name[:-5].replace("_", " ") + parsed = parse(path) + if parsed: + docs[command.upper()] = parsed + print(json.dumps(docs)) |