summaryrefslogtreecommitdiffstats
path: root/tools/make-wsluarm.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/make-wsluarm.py')
-rwxr-xr-xtools/make-wsluarm.py458
1 files changed, 458 insertions, 0 deletions
diff --git a/tools/make-wsluarm.py b/tools/make-wsluarm.py
new file mode 100755
index 00000000..52330756
--- /dev/null
+++ b/tools/make-wsluarm.py
@@ -0,0 +1,458 @@
+#!/usr/bin/env python3
+#
+# make-wsluarm.py
+#
+# By Gerald Combs <gerald@wireshark.org>
+# Based on make-wsluarm.pl by Luis E. Garcia Onatnon <luis.ontanon@gmail.com> and Hadriel Kaplan
+#
+# Wireshark - Network traffic analyzer
+# By Gerald Combs <gerald@wireshark.org>
+# Copyright 1998 Gerald Combs
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+'''\
+WSLUA's Reference Manual Generator
+
+This reads Doxygen-style comments in C code and generates wslua API documentation
+formatted as AsciiDoc.
+
+Behavior as documented by Hadriel:
+- Allows modules (i.e., WSLUA_MODULE) to have detailed descriptions
+- Two (or more) line breaks in comments result in separate paragraphs
+- Any indent with a single leading star '*' followed by space is a bulleted list item
+ reducing indent or having an extra linebreak stops the list
+- Any indent with a leading digits-dot followed by space, i.e. "1. ", is a numbered list item
+ reducing indent or having an extra linebreak stops the list
+'''
+
+import argparse
+import logging
+import os
+import re
+import sys
+
+from enum import Enum
+from string import Template
+
+def parse_desc(description):
+ '''\
+Break up descriptions based on newlines and keywords. Some processing
+is done for code blocks and lists, but the output is otherwise left
+intact. Assumes the input has been stripped.
+'''
+
+ c_lines = description.strip().splitlines()
+
+ if len(c_lines) < 1:
+ return ''
+
+ adoc_lines = []
+ cli = iter(c_lines)
+ for line in cli:
+ raw_len = len(line)
+ line = line.lstrip()
+ indent = raw_len - len(line)
+
+ # If we find "[source,...]" then treat it as a block
+ if re.search(r'\[source.*\]', line):
+ # The next line *should* be a delimiter...
+ block_delim = next(cli).strip()
+ line += f'\n{block_delim}\n'
+ block_line = next(cli)
+ # XXX try except StopIteration
+ while block_line.strip() != block_delim:
+ # Keep eating lines until the closing delimiter.
+ # XXX Strip indent spaces?
+ line += block_line + '\n'
+ block_line = next(cli)
+ line += block_delim + '\n'
+
+ adoc_lines.append(line)
+ elif re.match(r'^\s*$', line):
+ # line is either empty or just whitespace, and we're not in a @code block
+ # so it's the end of a previous paragraph, beginning of new one
+ adoc_lines.append('')
+ else:
+ # We have a regular line, not in a @code block.
+ # Add it as-is.
+
+ # if line starts with "@version" or "@since", make it a "Since:"
+ if re.match(r'^@(version|since)\s+', line):
+ line = re.sub(r'^@(version|since)\s+', 'Since: ', line)
+ adoc_lines.append(line)
+
+ # If line starts with single "*" and space, leave it mostly intact.
+ elif re.match(r'^\*\s', line):
+ adoc_lines += ['', line]
+ # keep eating until we find a blank line or end
+ line = next(cli)
+ try:
+ while not re.match(r'^\s*$', line):
+ raw_len = len(line)
+ line = line.lstrip()
+ # if this is less indented than before, break out
+ if raw_len - len(line) < indent:
+ break
+ adoc_lines += [line]
+ line = next(cli)
+ except StopIteration:
+ pass
+ adoc_lines.append('')
+
+ # if line starts with "1." and space, leave it mostly intact.
+ elif re.match(r'^1\.\s', line):
+ adoc_lines += ['', line]
+ # keep eating until we find a blank line or end
+ line = next(cli)
+ try:
+ while not re.match(r'^\s*$', line):
+ raw_len = len(line)
+ line = line.lstrip()
+ # if this is less indented than before, break out
+ if raw_len - len(line) < indent:
+ break
+ adoc_lines += [line]
+ line = next(cli)
+ except StopIteration:
+ pass
+ adoc_lines.append('')
+
+ # Just a normal line, add it to array
+ else:
+ # Nested Lua arrays
+ line = re.sub(r'\[\[(.*)\]\]', r'$$\1$$', line)
+ adoc_lines += [line]
+
+ # Strip out consecutive empty lines.
+ # This isn't strictly necessary but makes the AsciiDoc output prettier.
+ adoc_lines = '\n'.join(adoc_lines).splitlines()
+ adoc_lines = [val for idx, val in enumerate(adoc_lines) if idx == 0 or not (val == '' and val == adoc_lines[idx - 1])]
+
+ return '\n'.join(adoc_lines)
+
+
+class LuaFunction:
+ def __init__(self, c_file, id, start, name, raw_description):
+ self.c_file = c_file
+ self.id = id
+ self.start = start
+ self.name = name
+ if not raw_description:
+ raw_description = ''
+ self.description = parse_desc(raw_description)
+ self.arguments = [] # (name, description, optional)
+ self.returns = [] # description
+ self.errors = [] # description
+ logging.info(f'Created function {id} ({name}) at {start}')
+
+ def add_argument(self, id, raw_name, raw_description, raw_optional):
+ if id != self.id:
+ logging.critical(f'Invalid argument ID {id} in function {self.id}')
+ sys.exit(1)
+ if not raw_description:
+ raw_description = ''
+ optional = False
+ if raw_optional == 'OPT':
+ optional = True
+ self.arguments.append((raw_name.lower(), parse_desc(raw_description), optional))
+
+ def extract_buf(self, buf):
+ "Extract arguments, errors, and return values from a function's buffer."
+
+ # Splits "WSLUA_OPTARG_ProtoField_int8_NAME /* food */" into
+ # "OPT" (1), "ProtoField_int8" (2), "NAME" (3), ..., ..., " food " (6)
+ # Handles functions like "loadfile(filename)" too.
+ for m in re.finditer(r'#define WSLUA_(OPT)?ARG_((?:[A-Za-z0-9]+_)?[a-z0-9_]+)_([A-Z0-9_]+)\s+\d+' + TRAILING_COMMENT_RE, buf, re.MULTILINE|re.DOTALL):
+ self.add_argument(m.group(2), m.group(3), m.group(6), m.group(1))
+ logging.info(f'Created arg {m.group(3)} for {self.id} at {m.start()}')
+
+ # Same as above, except that there is no macro but a (multi-line) comment.
+ for m in re.finditer(r'/\*\s*WSLUA_(OPT)?ARG_((?:[A-Za-z0-9]+_)?[a-z0-9_]+)_([A-Z0-9_]+)\s*(.*?)\*/', buf, re.MULTILINE|re.DOTALL):
+ self.add_argument(m.group(2), m.group(3), m.group(4), m.group(1))
+ logging.info(f'Created arg {m.group(3)} for {self.id} at {m.start()}')
+
+ for m in re.finditer(r'/\*\s+WSLUA_MOREARGS\s+([A-Za-z_]+)\s+(.*?)\*/', buf, re.MULTILINE|re.DOTALL):
+ self.add_argument(m.group(1), '...', m.group(2), False)
+ logging.info(f'Created morearg for {self.id}')
+
+ for m in re.finditer(r'WSLUA_(FINAL_)?RETURN\(\s*.*?\s*\)\s*;' + TRAILING_COMMENT_RE, buf, re.MULTILINE|re.DOTALL):
+ if m.group(4) and len(m.group(4)) > 0:
+ self.returns.append(m.group(4).strip())
+ logging.info(f'Created return for {self.id} at {m.start()}')
+
+ for m in re.finditer(r'/\*\s*_WSLUA_RETURNS_\s*(.*?)\*/', buf, re.MULTILINE|re.DOTALL):
+ if m.group(1) and len(m.group(1)) > 0:
+ self.returns.append(m.group(1).strip())
+ logging.info(f'Created return for {self.id} at {m.start()}')
+
+ for m in re.finditer(r'WSLUA_ERROR\s*\(\s*(([A-Z][A-Za-z]+)_)?([a-z_]+),' + QUOTED_RE, buf, re.MULTILINE|re.DOTALL):
+ self.errors.append(m.group(4).strip())
+ logging.info(f'Created error {m.group(4)[:10]} for {self.id} at {m.start()}')
+
+ def to_adoc(self):
+ # The Perl script wrapped optional args in '[]', joined them with ', ', and
+ # converted non-alphabetic characters to underscores.
+ mangled_names = [f'_{a}_' if optional else a for a, _, optional in self.arguments]
+ section_name = re.sub('[^A-Za-z0-9]', '_', f'{self.name}_{"__".join(mangled_names)}_')
+ opt_names = [f'[{a}]' if optional else a for a, _, optional in self.arguments]
+ adoc_buf = f'''
+// {self.c_file}
+[#lua_fn_{section_name}]
+===== {self.name}({', '.join(opt_names)})
+
+{self.description}
+'''
+ if len(self.arguments) > 0:
+ adoc_buf += '''
+[float]
+===== Arguments
+'''
+ for (name, description, optional) in self.arguments:
+ if optional:
+ name += ' (optional)'
+ adoc_buf += f'\n{name}::\n'
+
+ if len(description) > 0:
+ adoc_buf += f'\n{description}\n'
+
+ adoc_buf += f'\n// function_arg_footer: {name}'
+
+ if len(self.arguments) > 0:
+ adoc_buf += '\n// end of function_args\n'
+
+ if len(self.returns) > 0:
+ adoc_buf += '''
+[float]
+===== Returns
+'''
+ for description in self.returns:
+ adoc_buf += f'\n{description}\n'
+
+ if len(self.returns) > 0:
+ adoc_buf += f'\n// function_returns_footer: {self.name}'
+
+ if len(self.errors) > 0:
+ adoc_buf += '''
+[float]
+===== Errors
+'''
+ for description in self.errors:
+ adoc_buf += f'\n* {description}\n'
+
+ if len(self.errors) > 0:
+ adoc_buf += f'\n// function_errors_footer: {self.name}'
+
+ adoc_buf += f'\n// function_footer: {section_name}\n'
+
+ return adoc_buf
+
+
+# group 1: whole trailing comment (possibly empty), e.g. " /* foo */"
+# group 2: any leading whitespace. XXX why is this not removed using (?:...)
+# group 3: actual comment text, e.g. " foo ".
+TRAILING_COMMENT_RE = r'((\s*|[\n\r]*)/\*(.*?)\*/)?'
+IN_COMMENT_RE = r'[\s\r\n]*((.*?)\s*\*/)?'
+QUOTED_RE = r'"([^"]*)"'
+
+# XXX We might want to create a "LuaClass" class similar to LuaFunction
+# and move these there.
+def extract_class_definitions(c_file, c_buf, module, classes, functions):
+ for m in re.finditer(r'WSLUA_CLASS_DEFINE(?:_BASE)?\(\s*([A-Z][a-zA-Z0-9]+).*?\);' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
+ raw_desc = m.group(4)
+ if raw_desc is None:
+ raw_desc = ''
+ name = m.group(1)
+ mod_class = {
+ 'description': parse_desc(raw_desc),
+ 'constructors': [],
+ 'methods': [],
+ 'attributes': [],
+ }
+ classes[name] = mod_class
+ logging.info(f'Created class {name}')
+ return 0
+
+def extract_function_definitions(c_file, c_buf, module, classes, functions):
+ for m in re.finditer(r'WSLUA_FUNCTION\s+wslua_([a-z_0-9]+)[^\{]*\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
+ id = m.group(1)
+ functions[id] = LuaFunction(c_file, id, m.start(), id, m.group(4))
+
+def extract_constructor_definitions(c_file, c_buf, module, classes, functions):
+ for m in re.finditer(r'WSLUA_CONSTRUCTOR\s+([A-Za-z0-9]+)_([a-z0-9_]+).*?\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
+ class_name = m.group(1)
+ id = f'{class_name}_{m.group(2)}'
+ name = f'{class_name}.{m.group(2)}'
+ functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(5))
+ classes[class_name]['constructors'].append(id)
+
+def extract_constructor_markups(c_file, c_buf, module, classes, functions):
+ for m in re.finditer(r'_WSLUA_CONSTRUCTOR_\s+([A-Za-z0-9]+)_([a-z0-9_]+)\s*(.*?)\*/', c_buf, re.MULTILINE|re.DOTALL):
+ class_name = m.group(1)
+ id = f'{class_name}_{m.group(2)}'
+ name = f'{class_name}.{m.group(2)}'
+ functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(3))
+ classes[class_name]['constructors'].append(id)
+
+def extract_method_definitions(c_file, c_buf, module, classes, functions):
+ for m in re.finditer(r'WSLUA_METHOD\s+([A-Za-z0-9]+)_([a-z0-9_]+)[^\{]*\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
+ class_name = m.group(1)
+ id = f'{class_name}_{m.group(2)}'
+ name = f'{class_name.lower()}:{m.group(2)}'
+ functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(5))
+ classes[class_name]['methods'].append(id)
+
+def extract_metamethod_definitions(c_file, c_buf, module, classes, functions):
+ for m in re.finditer(r'WSLUA_METAMETHOD\s+([A-Za-z0-9]+)(__[a-z0-9]+)[^\{]*\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
+ class_name = m.group(1)
+ id = f'{class_name}{m.group(2)}'
+ name = f'{class_name.lower()}:{m.group(2)}'
+ functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(5))
+ classes[class_name]['methods'].append(id)
+
+def extract_attribute_markups(c_file, c_buf, module, classes, functions):
+ for m in re.finditer(r'/\*\s+WSLUA_ATTRIBUTE\s+([A-Za-z0-9]+)_([a-z0-9_]+)\s+([A-Z]*)\s*(.*?)\*/', c_buf, re.MULTILINE|re.DOTALL):
+ class_name = m.group(1)
+ name = f'{m.group(1).lower()}.{m.group(2)}'
+ mode = m.group(3)
+ mode_desc = 'Mode: '
+ if 'RO' in mode:
+ mode_desc += 'Retrieve only.\n'
+ elif 'WO' in mode:
+ mode_desc += 'Assign only.\n'
+ elif 'RW' in mode or 'WR' in mode:
+ mode_desc += 'Retrieve or assign.\n'
+ else:
+ sys.stderr.write(f'Attribute does not have a RO/WO/RW mode {mode}\n')
+ sys.exit(1)
+
+ attribute = {
+ 'name': name,
+ 'description': parse_desc(f'{mode_desc}\n{m.group(4)}'),
+ }
+ classes[class_name]['attributes'].append(attribute)
+ logging.info(f'Created attribute {name} for class {class_name}')
+
+def main():
+ parser = argparse.ArgumentParser(description="WSLUA's Reference Manual Generator")
+ parser.add_argument("c_files", nargs='+', metavar='C file', help="C file")
+ parser.add_argument('--output-directory', help='Output directory')
+ parser.add_argument('--verbose', action='store_true', help='Show more output')
+ args = parser.parse_args()
+
+ logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG if args.verbose else logging.WARNING)
+
+ modules = {}
+
+ for c_file in args.c_files:
+ with open(c_file, encoding='utf-8') as c_f:
+ c_buf = c_f.read()
+
+ # Peek for modules vs continuations.
+ m = re.search(r'WSLUA_(|CONTINUE_)MODULE\s*(\w+)', c_buf)
+ if m:
+ module_name = m.group(2)
+ c_pair = (os.path.basename(c_file), c_buf)
+ try:
+ if m.group(1) == 'CONTINUE_':
+ modules[module_name]['c'].append(c_pair)
+ else:
+ modules[module_name]['c'].insert(0, c_pair)
+ except KeyError:
+ modules[module_name] = {}
+ modules[module_name]['c'] = [c_pair]
+ modules[module_name]['file_base'] = os.path.splitext(c_pair[0])[0]
+ else:
+ logging.warning(f'No module found in {c_file}')
+
+ extractors = [
+ extract_class_definitions,
+ extract_function_definitions,
+ extract_constructor_definitions,
+ extract_constructor_markups,
+ extract_method_definitions,
+ extract_metamethod_definitions,
+ extract_attribute_markups,
+ ]
+
+ for module_name in sorted(modules):
+ adoc_file = f'{modules[module_name]["file_base"]}.adoc'
+ logging.info(f'Writing module {module_name} to {adoc_file} from {len(modules[module_name]["c"])} input(s)')
+ functions = {}
+ classes = {}
+
+ # Extract our module's description.
+ m = re.search(r'WSLUA_MODULE\s*[A-Z][a-zA-Z0-9]+' + IN_COMMENT_RE, modules[module_name]['c'][0][1], re.MULTILINE|re.DOTALL)
+ if not m:
+ return
+ modules[module_name]['description'] = parse_desc(f'{m.group(2)}')
+
+ # Extract module-level information from each file.
+ for (c_file, c_buf) in modules[module_name]['c']:
+ for extractor in extractors:
+ extractor(c_file, c_buf, modules[module_name], classes, functions)
+
+ # Extract function-level information from each file.
+ for (c_file, c_buf) in modules[module_name]['c']:
+ c_file_ids = filter(lambda k: functions[k].c_file == c_file, functions.keys())
+ func_ids = sorted(c_file_ids, key=lambda k: functions[k].start)
+ id = func_ids.pop(0)
+ for next_id in func_ids:
+ functions[id].extract_buf(c_buf[functions[id].start:functions[next_id].start])
+ id = next_id
+ functions[id].extract_buf(c_buf[functions[id].start:])
+
+ with open(os.path.join(args.output_directory, adoc_file), 'w', encoding='utf-8') as adoc_f:
+ adoc_f.write(f'''\
+// {c_file}
+[#lua_module_{module_name}]
+=== {modules[module_name]["description"]}
+''')
+ for class_name in sorted(classes.keys()):
+ lua_class = classes[class_name]
+ adoc_f.write(f'''
+// {c_file}
+[#lua_class_{class_name}]
+==== {class_name}
+''')
+
+ if not lua_class["description"] == '':
+ adoc_f.write(f'\n{lua_class["description"]}\n')
+
+ for constructor_id in sorted(lua_class['constructors'], key=lambda id: functions[id].start):
+ adoc_f.write(functions[constructor_id].to_adoc())
+ del functions[constructor_id]
+
+ for method_id in sorted(lua_class['methods'], key=lambda id: functions[id].start):
+ adoc_f.write(functions[method_id].to_adoc())
+ del functions[method_id]
+
+ for attribute in lua_class['attributes']:
+ attribute_id = re.sub('[^A-Za-z0-9]', '_', f'{attribute["name"]}')
+ adoc_f.write(f'''
+[#lua_class_attrib_{attribute_id}]
+===== {attribute["name"]}
+
+{attribute["description"]}
+
+// End {attribute["name"]}
+''')
+
+
+ adoc_f.write(f'\n// class_footer: {class_name}\n')
+
+ if len(functions.keys()) > 0:
+ adoc_f.write(f'''\
+[#global_functions_{module_name}]
+==== Global Functions
+''')
+
+ for global_id in sorted(functions.keys(), key=lambda id: functions[id].start):
+ adoc_f.write(functions[global_id].to_adoc())
+
+ if len(functions.keys()) > 0:
+ adoc_f.write(f'// Global function\n')
+
+ adoc_f.write('// end of module\n')
+
+if __name__ == '__main__':
+ main()