summaryrefslogtreecommitdiffstats
path: root/scripts/docparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/docparser.py')
-rw-r--r--scripts/docparser.py51
1 files changed, 51 insertions, 0 deletions
diff --git a/scripts/docparser.py b/scripts/docparser.py
new file mode 100644
index 0000000..a6019c9
--- /dev/null
+++ b/scripts/docparser.py
@@ -0,0 +1,51 @@
+from bs4 import BeautifulSoup
+import sys
+import os
+import json
+
+
+def get_section(doc, section):
+ element = doc.find(section)
+ if element:
+ return element.get_text()
+
+
+def get_description(doc):
+ text = get_section(doc, "refsect1")
+ if text:
+ lines = filter(lambda x: x.strip(), text.split("\n"))
+
+ if len(lines) > 1 and lines[0] == "Description":
+ return lines[0] + "\n" + lines[1]
+
+
+def parse(file_name):
+ with open(file_name, "r") as file:
+ doc = BeautifulSoup(file.read(), "html.parser")
+ desc = get_description(doc)
+ synopsis = get_section(doc, "synopsis")
+ if desc and synopsis:
+ return {"description": desc, "synopsis": synopsis}
+
+
+if __name__ == "__main__":
+ if len(sys.argv) < 2:
+ print("Parse postgres SGML reference files into JSON")
+ print("Usage:")
+ print(
+ 'echo -n "commands = " > command_help.py; python parser.py ref/ | python -mjson.tool | sed \'s/"\\: null/": None/g\' >> command_help.py'
+ )
+ print("")
+ sys.exit(0)
+
+ dir = sys.argv[1]
+ docs = {}
+
+ for file_name in os.listdir(dir):
+ if file_name.endswith(".sgml"):
+ path = dir.rstrip("/") + "/" + file_name
+ command = file_name[:-5].replace("_", " ")
+ parsed = parse(path)
+ if parsed:
+ docs[command.upper()] = parsed
+ print(json.dumps(docs))