blob: f094e09e4f50b799fa4a8642997518358a220e7e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
#!/usr/bin/python3
import glob
from lxml import etree
exclude_list = list(glob.glob('standard-*.xml'))
PARSER = etree.XMLParser(remove_blank_text=True)
def extract_data(fname):
et = etree.parse(fname, PARSER)
manvolnum = et.find('./refmeta/manvolnum')
manvolnum = manvolnum.text if manvolnum is not None else 0
deps = set()
for elem in et.iter():
keys = elem.keys()
if 'href' in keys and 'xpointer' in keys:
dep = elem.values()[0]
if dep in exclude_list:
deps.add(dep)
return manvolnum, list(deps)
output = list()
file_list = glob.glob('*.xml')
for fname in file_list:
if fname not in exclude_list:
stem = fname[0:-4]
manvolnum, deps = extract_data(fname)
deps = ':'.join(deps) if deps else 'None'
output.append(','.join([stem, manvolnum, fname, deps]))
print(';'.join(output))
|