summaryrefslogtreecommitdiffstats
path: root/doc/genlist-from-docbooks.py
blob: f094e09e4f50b799fa4a8642997518358a220e7e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/python3
import glob
from lxml import etree

exclude_list = list(glob.glob('standard-*.xml'))

PARSER = etree.XMLParser(remove_blank_text=True)


def extract_data(fname):
    et = etree.parse(fname, PARSER)

    manvolnum = et.find('./refmeta/manvolnum')
    manvolnum = manvolnum.text if manvolnum is not None else 0

    deps = set()
    for elem in et.iter():
        keys = elem.keys()
        if 'href' in keys and 'xpointer' in keys:
            dep = elem.values()[0]
            if dep in exclude_list:
                deps.add(dep)

    return manvolnum, list(deps)


output = list()
file_list = glob.glob('*.xml')
for fname in file_list:
    if fname not in exclude_list:
        stem = fname[0:-4]
        manvolnum, deps = extract_data(fname)
        deps = ':'.join(deps) if deps else 'None'
        output.append(','.join([stem, manvolnum, fname, deps]))

print(';'.join(output))