diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/callgraph-dot.py | 494 | ||||
-rw-r--r-- | python/clidef.py | 494 | ||||
-rw-r--r-- | python/clippy/__init__.py | 92 | ||||
-rw-r--r-- | python/clippy/elf.py | 613 | ||||
-rw-r--r-- | python/clippy/uidhash.py | 61 | ||||
-rw-r--r-- | python/firstheader.py | 77 | ||||
-rw-r--r-- | python/makefile.py | 217 | ||||
-rw-r--r-- | python/makevars.py | 100 | ||||
-rw-r--r-- | python/runtests.py | 16 | ||||
-rw-r--r-- | python/test_xrelfo.py | 55 | ||||
-rw-r--r-- | python/tiabwarfo.py | 225 | ||||
-rw-r--r-- | python/xref2vtysh.py | 388 | ||||
-rw-r--r-- | python/xrefstructs.json | 140 | ||||
-rw-r--r-- | python/xrelfo.py | 526 |
14 files changed, 3498 insertions, 0 deletions
diff --git a/python/callgraph-dot.py b/python/callgraph-dot.py new file mode 100644 index 0000000..4e58b19 --- /dev/null +++ b/python/callgraph-dot.py @@ -0,0 +1,494 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# callgraph json to graphviz generator for FRR +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. + +import re +import sys +import json + + +class FunctionNode(object): + funcs = {} + + def __init__(self, name): + super().__init__() + FunctionNode.funcs[name] = self + + self.name = name + self.out = [] + self.inb = [] + self.rank = None + self.defined = False + self.defs = [] + + def __repr__(self): + return '<"%s()" rank=%r>' % (self.name, self.rank) + + def define(self, attrs): + self.defined = True + self.defs.append((attrs["filename"], attrs["line"])) + return self + + def add_call(self, called, attrs): + return CallEdge(self, called, attrs) + + def calls(self): + for e in self.out: + yield e.o + + def calld(self): + for e in self.inb: + yield e.i + + def unlink(self, other): + self.out = list([edge for edge in self.out if edge.o != other]) + other.inb = list([edge for edge in other.inb if edge.i != other]) + + @classmethod + def get(cls, name): + if name in cls.funcs: + return cls.funcs[name] + return FunctionNode(name) + + +class CallEdge(object): + def __init__(self, i, o, attrs): + self.i = i + self.o = o + self.is_external = attrs["is_external"] + self.attrs = attrs + + i.out.append(self) + o.inb.append(self) + + def __repr__(self): + return '<"%s()" -> "%s()">' % (self.i.name, self.o.name) + + +def nameclean(n): + if "." in n: + return n.split(".", 1)[0] + return n + + +def calc_rank(queue, direction): + nextq = queue + + if direction == 1: + aggr = max + elem = lambda x: x.calls() + else: + aggr = min + elem = lambda x: x.calld() + + currank = direction + cont = True + + while len(nextq) > 0 and cont: + queue = nextq + nextq = [] + + # sys.stderr.write('rank %d\n' % currank) + + cont = False + + for node in queue: + if not node.defined: + node.rank = 0 + continue + + rank = direction + for other in elem(node): + if other is node: + continue + if other.rank is None: + nextq.append(node) + break + rank = aggr(rank, other.rank + direction) + else: + cont = True + node.rank = rank + + currank += direction + + return nextq + + +class Graph(dict): + class Subgraph(set): + def __init__(self): + super().__init__() + + class NodeGroup(set): + def __init__(self, members): + super().__init__(members) + + class Node(object): + def __init__(self, graph, fn): + super().__init__() + self._fn = fn + self._fns = [fn] + self._graph = graph + self._calls = set() + self._calld = set() + self._group = None + + def __repr__(self): + return '<Graph.Node "%s()"/%d>' % (self._fn.name, len(self._fns)) + + def __hash__(self): + return hash(self._fn.name) + + def _finalize(self): + for called in self._fn.calls(): + if called.name == self._fn.name: + continue + if called.name in self._graph: + self._calls.add(self._graph[called.name]) + self._graph[called.name]._calld.add(self) + + def unlink(self, other): + self._calls.remove(other) + other._calld.remove(self) + + @property + def name(self): + return self._fn.name + + def calls(self): + return self._calls + + def calld(self): + return self._calld + + def group(self, members): + assert self in members + + pregroups = [] + for g in [m._group for m in members]: + if g is None: + continue + if g in pregroups: + continue + + assert g <= members + pregroups.append(g) + + if len(pregroups) == 0: + group = self._graph.NodeGroup(members) + self._graph._groups.append(group) + elif len(pregroups) == 1: + group = pregroups[0] + group |= members + else: + for g in pregroups: + self._graph._groups.remove(g) + group = self._graph.NodeGroup(members) + self._graph._groups.append(group) + + for m in members: + m._group = group + return group + + def merge(self, other): + self._fns.extend(other._fns) + self._calls = (self._calls | other._calls) - {self, other} + self._calld = (self._calld | other._calld) - {self, other} + for c in other._calls: + if c == self: + continue + c._calld.remove(other) + c._calld.add(self) + for c in other._calld: + if c == self: + continue + c._calls.remove(other) + c._calls.add(self) + del self._graph[other._fn.name] + + def __init__(self, funcs): + super().__init__() + self._funcs = funcs + for fn in funcs: + self[fn.name] = self.Node(self, fn) + for node in self.values(): + node._finalize() + self._groups = [] + + def automerge(self): + nodes = list(self.values()) + + while len(nodes): + node = nodes.pop(0) + + candidates = {node} + evalset = set(node.calls()) + prevevalset = None + + while prevevalset != evalset: + prevevalset = evalset + evalset = set() + + for evnode in prevevalset: + inbound = set(evnode.calld()) + if inbound <= candidates: + candidates.add(evnode) + evalset |= set(evnode.calls()) - candidates + else: + evalset.add(evnode) + + # if len(candidates) > 1: + # for candidate in candidates: + # if candidate != node: + # #node.merge(candidate) + # if candidate in nodes: + # nodes.remove(candidate) + node.group(candidates) + + for candidate in candidates: + if candidate in nodes: + nodes.remove(candidate) + + def calc_subgraphs(self): + nodes = list(self.values()) + self._subgraphs = [] + up = {} + down = {} + + self._linear_nodes = [] + + while len(nodes): + sys.stderr.write("%d\n" % len(nodes)) + node = nodes.pop(0) + + down[node] = set() + queue = [node] + while len(queue): + now = queue.pop() + down[node].add(now) + for calls in now.calls(): + if calls in down[node]: + continue + queue.append(calls) + + up[node] = set() + queue = [node] + while len(queue): + now = queue.pop() + up[node].add(now) + for calld in now.calld(): + if calld in up[node]: + continue + queue.append(calld) + + common = up[node] & down[node] + + if len(common) == 1: + self._linear_nodes.append(node) + else: + sg = self.Subgraph() + sg |= common + self._subgraphs.append(sg) + for n in common: + if n != node: + nodes.remove(n) + + return self._subgraphs, self._linear_nodes + + +with open(sys.argv[1], "r") as fd: + data = json.load(fd) + +extra_info = { + # zebra - LSP WQ + ("lsp_processq_add", "work_queue_add"): [ + "lsp_process", + "lsp_processq_del", + "lsp_processq_complete", + ], + # zebra - main WQ + ("mq_add_handler", "work_queue_add"): [ + "meta_queue_process", + ], + ("meta_queue_process", "work_queue_add"): [ + "meta_queue_process", + ], + # bgpd - label pool WQ + ("bgp_lp_get", "work_queue_add"): [ + "lp_cbq_docallback", + ], + ("bgp_lp_event_chunk", "work_queue_add"): [ + "lp_cbq_docallback", + ], + ("bgp_lp_event_zebra_up", "work_queue_add"): [ + "lp_cbq_docallback", + ], + # bgpd - main WQ + ("bgp_process", "work_queue_add"): [ + "bgp_process_wq", + "bgp_processq_del", + ], + ("bgp_add_eoiu_mark", "work_queue_add"): [ + "bgp_process_wq", + "bgp_processq_del", + ], + # clear node WQ + ("bgp_clear_route_table", "work_queue_add"): [ + "bgp_clear_route_node", + "bgp_clear_node_queue_del", + "bgp_clear_node_complete", + ], + # rfapi WQs + ("rfapi_close", "work_queue_add"): [ + "rfapi_deferred_close_workfunc", + ], + ("rfapiRibUpdatePendingNode", "work_queue_add"): [ + "rfapiRibDoQueuedCallback", + "rfapiRibQueueItemDelete", + ], +} + + +for func, fdata in data["functions"].items(): + func = nameclean(func) + fnode = FunctionNode.get(func).define(fdata) + + for call in fdata["calls"]: + if call.get("type") in [None, "unnamed", "thread_sched"]: + if call.get("target") is None: + continue + tgt = nameclean(call["target"]) + fnode.add_call(FunctionNode.get(tgt), call) + for fptr in call.get("funcptrs", []): + fnode.add_call(FunctionNode.get(nameclean(fptr)), call) + if tgt == "work_queue_add": + if (func, tgt) not in extra_info: + sys.stderr.write( + "%s:%d:%s(): work_queue_add() not handled\n" + % (call["filename"], call["line"], func) + ) + else: + attrs = dict(call) + attrs.update({"is_external": False, "type": "workqueue"}) + for dst in extra_info[func, tgt]: + fnode.add_call(FunctionNode.get(dst), call) + elif call["type"] == "install_element": + vty_node = FunctionNode.get("VTY_NODE_%d" % call["vty_node"]) + vty_node.add_call(FunctionNode.get(nameclean(call["target"])), call) + elif call["type"] == "hook": + # TODO: edges for hooks from data['hooks'] + pass + +n = FunctionNode.funcs + +# fix some very low end functions cycling back very far to the top +if "peer_free" in n: + n["peer_free"].unlink(n["bgp_timer_set"]) + n["peer_free"].unlink(n["bgp_addpath_set_peer_type"]) +if "bgp_path_info_extra_free" in n: + n["bgp_path_info_extra_free"].rank = 0 + +if "zlog_ref" in n: + n["zlog_ref"].rank = 0 +if "mt_checkalloc" in n: + n["mt_checkalloc"].rank = 0 + +queue = list(FunctionNode.funcs.values()) +queue = calc_rank(queue, 1) +queue = calc_rank(queue, -1) + +sys.stderr.write("%d functions in cyclic set\n" % len(queue)) + +graph = Graph(queue) +graph.automerge() + +gv_nodes = [] +gv_edges = [] + +sys.stderr.write("%d groups after automerge\n" % len(graph._groups)) + + +def is_vnc(n): + return n.startswith("rfapi") or n.startswith("vnc") or ("_vnc_" in n) + + +_vncstyle = ',fillcolor="#ffffcc",style=filled' +cyclic_set_names = set([fn.name for fn in graph.values()]) + +for i, group in enumerate(graph._groups): + if len(group) > 1: + group.num = i + gv_nodes.append("\tsubgraph cluster_%d {" % i) + gv_nodes.append("\t\tcolor=blue;") + for gn in group: + has_cycle_callers = set(gn.calld()) - group + has_ext_callers = ( + set([edge.i.name for edge in gn._fn.inb]) - cyclic_set_names + ) + + style = "" + etext = "" + if is_vnc(gn.name): + style += _vncstyle + if has_cycle_callers: + style += ",color=blue,penwidth=3" + if has_ext_callers: + style += ',fillcolor="#ffeebb",style=filled' + etext += '<br/><font point-size="10">(%d other callers)</font>' % ( + len(has_ext_callers) + ) + + gv_nodes.append( + '\t\t"%s" [shape=box,label=<%s%s>%s];' + % (gn.name, "<br/>".join([fn.name for fn in gn._fns]), etext, style) + ) + gv_nodes.append("\t}") + else: + for gn in group: + has_ext_callers = ( + set([edge.i.name for edge in gn._fn.inb]) - cyclic_set_names + ) + + style = "" + etext = "" + if is_vnc(gn.name): + style += _vncstyle + if has_ext_callers: + style += ',fillcolor="#ffeebb",style=filled' + etext += '<br/><font point-size="10">(%d other callers)</font>' % ( + len(has_ext_callers) + ) + gv_nodes.append( + '\t"%s" [shape=box,label=<%s%s>%s];' + % (gn.name, "<br/>".join([fn.name for fn in gn._fns]), etext, style) + ) + +edges = set() +for gn in graph.values(): + for calls in gn.calls(): + if gn._group == calls._group: + gv_edges.append( + '\t"%s" -> "%s" [color="#55aa55",style=dashed];' % (gn.name, calls.name) + ) + else: + + def xname(nn): + if len(nn._group) > 1: + return "cluster_%d" % nn._group.num + else: + return nn.name + + tup = xname(gn), calls.name + if tup[0] != tup[1] and tup not in edges: + gv_edges.append('\t"%s" -> "%s" [weight=0.0,w=0.0,color=blue];' % tup) + edges.add(tup) + +with open(sys.argv[2], "w") as fd: + fd.write( + """digraph { + node [fontsize=13,fontname="Fira Sans"]; +%s +}""" + % "\n".join(gv_nodes + [""] + gv_edges) + ) diff --git a/python/clidef.py b/python/clidef.py new file mode 100644 index 0000000..244a820 --- /dev/null +++ b/python/clidef.py @@ -0,0 +1,494 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# FRR CLI preprocessor (DEFPY) +# +# Copyright (C) 2017 David Lamparter for NetDEF, Inc. + +import clippy, traceback, sys, os +from collections import OrderedDict +from functools import reduce +from pprint import pprint +from string import Template +from io import StringIO + +# the various handlers generate output C code for a particular type of +# CLI token, choosing the most useful output C type. + + +class RenderHandler(object): + def __init__(self, token): + pass + + def combine(self, other): + if type(self) == type(other): + return other + return StringHandler(None) + + deref = "" + drop_str = False + canfail = True + canassert = False + + +class StringHandler(RenderHandler): + argtype = "const char *" + decl = Template("const char *$varname = NULL;") + code = Template( + "$varname = (argv[_i]->type == WORD_TKN) ? argv[_i]->text : argv[_i]->arg;" + ) + drop_str = True + canfail = False + canassert = True + + +class LongHandler(RenderHandler): + argtype = "long" + decl = Template("long $varname = 0;") + code = Template( + """\ +char *_end; +$varname = strtol(argv[_i]->arg, &_end, 10); +_fail = (_end == argv[_i]->arg) || (*_end != '\\0');""" + ) + + +class AsDotHandler(RenderHandler): + argtype = "as_t" + decl = Template("as_t $varname = 0;") + code = Template("_fail = !asn_str2asn(argv[_i]->arg, &$varname);") + + +# A.B.C.D/M (prefix_ipv4) and +# X:X::X:X/M (prefix_ipv6) are "compatible" and can merge into a +# struct prefix: + + +class PrefixBase(RenderHandler): + def combine(self, other): + if type(self) == type(other): + return other + if isinstance(other, PrefixBase): + return PrefixGenHandler(None) + return StringHandler(None) + + deref = "&" + + +class Prefix4Handler(PrefixBase): + argtype = "const struct prefix_ipv4 *" + decl = Template("struct prefix_ipv4 $varname = { };") + code = Template("_fail = !str2prefix_ipv4(argv[_i]->arg, &$varname);") + + +class Prefix6Handler(PrefixBase): + argtype = "const struct prefix_ipv6 *" + decl = Template("struct prefix_ipv6 $varname = { };") + code = Template("_fail = !str2prefix_ipv6(argv[_i]->arg, &$varname);") + + +class PrefixEthHandler(PrefixBase): + argtype = "struct prefix_eth *" + decl = Template("struct prefix_eth $varname = { };") + code = Template("_fail = !str2prefix_eth(argv[_i]->arg, &$varname);") + + +class PrefixGenHandler(PrefixBase): + argtype = "const struct prefix *" + decl = Template("struct prefix $varname = { };") + code = Template("_fail = !str2prefix(argv[_i]->arg, &$varname);") + + +# same for IP addresses. result is union sockunion. +class IPBase(RenderHandler): + def combine(self, other): + if type(self) == type(other): + return other + if type(other) in [IP4Handler, IP6Handler, IPGenHandler]: + return IPGenHandler(None) + return StringHandler(None) + + +class IP4Handler(IPBase): + argtype = "struct in_addr" + decl = Template("struct in_addr $varname = { INADDR_ANY };") + code = Template("_fail = !inet_aton(argv[_i]->arg, &$varname);") + + +class IP6Handler(IPBase): + argtype = "struct in6_addr" + decl = Template("struct in6_addr $varname = {};") + code = Template("_fail = !inet_pton(AF_INET6, argv[_i]->arg, &$varname);") + + +class IPGenHandler(IPBase): + argtype = "const union sockunion *" + decl = Template( + """union sockunion s__$varname = { .sa.sa_family = AF_UNSPEC }, *$varname = NULL;""" + ) + code = Template( + """\ +if (argv[_i]->text[0] == 'X') { + s__$varname.sa.sa_family = AF_INET6; + _fail = !inet_pton(AF_INET6, argv[_i]->arg, &s__$varname.sin6.sin6_addr); + $varname = &s__$varname; +} else { + s__$varname.sa.sa_family = AF_INET; + _fail = !inet_aton(argv[_i]->arg, &s__$varname.sin.sin_addr); + $varname = &s__$varname; +}""" + ) + canassert = True + + +def mix_handlers(handlers): + def combine(a, b): + if a is None: + return b + return a.combine(b) + + return reduce(combine, handlers, None) + + +handlers = { + "WORD_TKN": StringHandler, + "VARIABLE_TKN": StringHandler, + "RANGE_TKN": LongHandler, + "IPV4_TKN": IP4Handler, + "IPV4_PREFIX_TKN": Prefix4Handler, + "IPV6_TKN": IP6Handler, + "IPV6_PREFIX_TKN": Prefix6Handler, + "MAC_TKN": PrefixEthHandler, + "MAC_PREFIX_TKN": PrefixEthHandler, + "ASNUM_TKN": AsDotHandler, +} + +# core template invoked for each occurence of DEFPY. +# +# the "#if $..." bits are there to keep this template unified into one +# common form, without requiring a more advanced template engine (e.g. +# jinja2) +templ = Template( + """$cond_begin/* $fnname => "$cmddef" */ +DEFUN_CMD_FUNC_DECL($fnname) +#define funcdecl_$fnname static int ${fnname}_magic(\\ + const struct cmd_element *self __attribute__ ((unused)),\\ + struct vty *vty __attribute__ ((unused)),\\ + int argc __attribute__ ((unused)),\\ + struct cmd_token *argv[] __attribute__ ((unused))$argdefs) +funcdecl_$fnname; +DEFUN_CMD_FUNC_TEXT($fnname) +{ +#if $nonempty /* anything to parse? */ + int _i; +#if $canfail /* anything that can fail? */ + unsigned _fail = 0, _failcnt = 0; +#endif +$argdecls + for (_i = 0; _i < argc; _i++) { + if (!argv[_i]->varname) + continue; +#if $canfail /* anything that can fail? */ + _fail = 0; +#endif +$argblocks +#if $canfail /* anything that can fail? */ + if (_fail) + vty_out (vty, "%% invalid input for %s: %s\\n", + argv[_i]->varname, argv[_i]->arg); + _failcnt += _fail; +#endif + } +#if $canfail /* anything that can fail? */ + if (_failcnt) + return CMD_WARNING; +#endif +#endif +$argassert + return ${fnname}_magic(self, vty, argc, argv$arglist); +} +$cond_end +""" +) + +# invoked for each named parameter +argblock = Template( + """ + if (!strcmp(argv[_i]->varname, \"$varname\")) {$strblock + $code + }""" +) + + +def get_always_args(token, always_args, args=[], stack=[]): + if token in stack: + return + if token.type == "END_TKN": + for arg in list(always_args): + if arg not in args: + always_args.remove(arg) + return + + stack = stack + [token] + if token.type in handlers and token.varname is not None: + args = args + [token.varname] + for nexttkn in token.next(): + get_always_args(nexttkn, always_args, args, stack) + + +class Macros(dict): + def __init__(self): + super().__init__() + self._loc = {} + + def load(self, filename): + filedata = clippy.parse(filename) + for entry in filedata["data"]: + if entry["type"] != "PREPROC": + continue + self.load_preproc(filename, entry) + + def setup(self, key, val, where="built-in"): + self[key] = val + self._loc[key] = (where, 0) + + def load_preproc(self, filename, entry): + ppdir = entry["line"].lstrip().split(None, 1) + if ppdir[0] != "define" or len(ppdir) != 2: + return + ppdef = ppdir[1].split(None, 1) + name = ppdef[0] + if "(" in name: + return + val = ppdef[1] if len(ppdef) == 2 else "" + + val = val.strip(" \t\n\\") + if self.get(name, val) != val: + sys.stderr.write( + "%s:%d: warning: macro %s redefined!\n" + % ( + filename, + entry["lineno"], + name, + ) + ) + sys.stderr.write( + "%s:%d: note: previously defined here\n" + % ( + self._loc[name][0], + self._loc[name][1], + ) + ) + else: + self[name] = val + self._loc[name] = (filename, entry["lineno"]) + + +def process_file(fn, ofd, dumpfd, all_defun, macros): + errors = 0 + filedata = clippy.parse(fn) + + cond_stack = [] + + for entry in filedata["data"]: + if entry["type"] == "PREPROC": + line = entry["line"].lstrip() + tokens = line.split(maxsplit=1) + line = "#" + line + "\n" + + if not tokens: + continue + + if tokens[0] in ["if", "ifdef", "ifndef"]: + cond_stack.append(line) + elif tokens[0] in ["elif", "else"]: + prev_line = cond_stack.pop(-1) + cond_stack.append(prev_line + line) + elif tokens[0] in ["endif"]: + cond_stack.pop(-1) + elif tokens[0] in ["define"]: + if not cond_stack: + macros.load_preproc(fn, entry) + elif len(cond_stack) == 1 and cond_stack[0] == "#ifdef CLIPPY\n": + macros.load_preproc(fn, entry) + continue + if entry["type"].startswith("DEFPY") or ( + all_defun and entry["type"].startswith("DEFUN") + ): + if len(entry["args"][0]) != 1: + sys.stderr.write( + "%s:%d: DEFPY function name not parseable (%r)\n" + % (fn, entry["lineno"], entry["args"][0]) + ) + errors += 1 + continue + + cmddef = entry["args"][2] + cmddefx = [] + for i in cmddef: + while i in macros: + i = macros[i] + if i.startswith('"') and i.endswith('"'): + cmddefx.append(i[1:-1]) + continue + + sys.stderr.write( + "%s:%d: DEFPY command string not parseable (%r)\n" + % (fn, entry["lineno"], cmddef) + ) + errors += 1 + cmddefx = None + break + if cmddefx is None: + continue + cmddef = "".join([i for i in cmddefx]) + + graph = clippy.Graph(cmddef) + args = OrderedDict() + always_args = set() + for token, depth in clippy.graph_iterate(graph): + if token.type not in handlers: + continue + if token.varname is None: + continue + arg = args.setdefault(token.varname, []) + arg.append(handlers[token.type](token)) + always_args.add(token.varname) + + get_always_args(graph.first(), always_args) + + # print('-' * 76) + # pprint(entry) + # clippy.dump(graph) + # pprint(args) + + params = {"cmddef": cmddef, "fnname": entry["args"][0][0]} + argdefs = [] + argdecls = [] + arglist = [] + argblocks = [] + argassert = [] + doc = [] + canfail = 0 + + def do_add(handler, basename, varname, attr=""): + argdefs.append(",\\\n\t%s %s%s" % (handler.argtype, varname, attr)) + argdecls.append( + "\t%s\n" + % ( + handler.decl.substitute({"varname": varname}).replace( + "\n", "\n\t" + ) + ) + ) + arglist.append(", %s%s" % (handler.deref, varname)) + if basename in always_args and handler.canassert: + argassert.append( + """\tif (!%s) { +\t\tvty_out(vty, "Internal CLI error [%%s]\\n", "%s"); +\t\treturn CMD_WARNING; +\t}\n""" + % (varname, varname) + ) + if attr == "": + at = handler.argtype + if not at.startswith("const "): + at = ". . . " + at + doc.append( + "\t%-26s %s %s" + % (at, "alw" if basename in always_args else "opt", varname) + ) + + for varname in args.keys(): + handler = mix_handlers(args[varname]) + # print(varname, handler) + if handler is None: + continue + do_add(handler, varname, varname) + code = handler.code.substitute({"varname": varname}).replace( + "\n", "\n\t\t\t" + ) + if handler.canfail: + canfail = 1 + strblock = "" + if not handler.drop_str: + do_add( + StringHandler(None), + varname, + "%s_str" % (varname), + " __attribute__ ((unused))", + ) + strblock = "\n\t\t\t%s_str = argv[_i]->arg;" % (varname) + argblocks.append( + argblock.substitute( + {"varname": varname, "strblock": strblock, "code": code} + ) + ) + + if dumpfd is not None: + if len(arglist) > 0: + dumpfd.write('"%s":\n%s\n\n' % (cmddef, "\n".join(doc))) + else: + dumpfd.write('"%s":\n\t---- no magic arguments ----\n\n' % (cmddef)) + + params["cond_begin"] = "".join(cond_stack) + params["cond_end"] = "".join(["#endif\n"] * len(cond_stack)) + params["argdefs"] = "".join(argdefs) + params["argdecls"] = "".join(argdecls) + params["arglist"] = "".join(arglist) + params["argblocks"] = "".join(argblocks) + params["canfail"] = canfail + params["nonempty"] = len(argblocks) + params["argassert"] = "".join(argassert) + ofd.write(templ.substitute(params)) + + return errors + + +if __name__ == "__main__": + import argparse + + argp = argparse.ArgumentParser(description="FRR CLI preprocessor in Python") + argp.add_argument( + "--all-defun", + action="store_const", + const=True, + help="process DEFUN() statements in addition to DEFPY()", + ) + argp.add_argument( + "--show", + action="store_const", + const=True, + help="print out list of arguments and types for each definition", + ) + argp.add_argument("-o", type=str, metavar="OUTFILE", help="output C file name") + argp.add_argument("cfile", type=str) + args = argp.parse_args() + + dumpfd = None + if args.o is not None: + ofd = StringIO() + if args.show: + dumpfd = sys.stdout + else: + ofd = sys.stdout + if args.show: + dumpfd = sys.stderr + + basepath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + macros = Macros() + macros.load("lib/route_types.h") + macros.load(os.path.join(basepath, "lib/command.h")) + macros.load(os.path.join(basepath, "bgpd/bgp_vty.h")) + # sigh :( + macros.setup("PROTO_REDIST_STR", "FRR_REDIST_STR_ISISD") + macros.setup("PROTO_IP_REDIST_STR", "FRR_IP_REDIST_STR_ISISD") + macros.setup("PROTO_IP6_REDIST_STR", "FRR_IP6_REDIST_STR_ISISD") + + errors = process_file(args.cfile, ofd, dumpfd, args.all_defun, macros) + if errors != 0: + sys.exit(1) + + if args.o is not None: + clippy.wrdiff( + args.o, ofd, [args.cfile, os.path.realpath(__file__), sys.executable] + ) diff --git a/python/clippy/__init__.py b/python/clippy/__init__.py new file mode 100644 index 0000000..60119fb --- /dev/null +++ b/python/clippy/__init__.py @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# FRR CLI preprocessor +# +# Copyright (C) 2017 David Lamparter for NetDEF, Inc. + +import os, stat + +try: + from enum import IntFlag as _IntFlag +except ImportError: + # python <3.6 + from enum import IntEnum as _IntFlag # type: ignore + +import _clippy +from _clippy import ( + parse, + Graph, + GraphNode, + CMD_ATTR_YANG, + CMD_ATTR_HIDDEN, + CMD_ATTR_DEPRECATED, + CMD_ATTR_NOSH, +) + + +frr_top_src = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +) + + +def graph_iterate(graph): + """iterator yielding all nodes of a graph + + nodes arrive in input/definition order, graph circles are avoided. + """ + + queue = [(graph.first(), frozenset(), 0)] + while len(queue) > 0: + node, stop, depth = queue.pop(0) + yield node, depth + + join = node.join() + if join is not None: + queue.insert(0, (join, stop.union(frozenset([node])), depth)) + join = frozenset([join]) + + stop = join or stop + nnext = node.next() + for n in reversed(nnext): + if n not in stop and n is not node: + queue.insert(0, (n, stop, depth + 1)) + + +def dump(graph): + """print out clippy.Graph""" + + for i, depth in graph_iterate(graph): + print("\t%s%s %r" % (" " * (depth * 2), i.type, i.text)) + + +def wrdiff(filename, buf, reffiles=[]): + """write buffer to file if contents changed""" + + expl = "" + if hasattr(buf, "getvalue"): + buf = buf.getvalue() + old = None + try: + old = open(filename, "r").read() + except: + pass + if old == buf: + for reffile in reffiles: + # ensure output timestamp is newer than inputs, for make + reftime = os.stat(reffile)[stat.ST_MTIME] + outtime = os.stat(filename)[stat.ST_MTIME] + if outtime <= reftime: + os.utime(filename, (reftime + 1, reftime + 1)) + # sys.stderr.write('%s unchanged, not written\n' % (filename)) + return + + newname = "%s.new-%d" % (filename, os.getpid()) + with open(newname, "w") as out: + out.write(buf) + os.rename(newname, filename) + + +class CmdAttr(_IntFlag): + YANG = CMD_ATTR_YANG + HIDDEN = CMD_ATTR_HIDDEN + DEPRECATED = CMD_ATTR_DEPRECATED + NOSH = CMD_ATTR_NOSH diff --git a/python/clippy/elf.py b/python/clippy/elf.py new file mode 100644 index 0000000..cc442ee --- /dev/null +++ b/python/clippy/elf.py @@ -0,0 +1,613 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# FRR libelf wrapper +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. + +""" +Wrapping layer and additional utility around _clippy.ELFFile. + +Essentially, the C bits have the low-level ELF access bits that should be +fast while this has the bits that string everything together (and would've +been a PITA to do in C.) + +Surprisingly - or maybe through proper engineering - this actually works +across architecture, word size and even endianness boundaries. Both the C +module (through GElf_*) and this code (cf. struct.unpack format mangling +in ELFDissectStruct) will take appropriate measures to flip and resize +fields as needed. +""" + +import struct +from collections import OrderedDict +from weakref import WeakValueDictionary + +from _clippy import ELFFile, ELFAccessError + +# +# data access +# + + +class ELFNull(object): + """ + NULL pointer, returned instead of ELFData + """ + + def __init__(self): + self.symname = None + self._dstsect = None + + def __repr__(self): + return "<ptr: NULL>" + + def __hash__(self): + return hash(None) + + def get_string(self): + return None + + +class ELFUnresolved(object): + """ + Reference to an unresolved external symbol, returned instead of ELFData + + :param symname: name of the referenced symbol + :param addend: offset added to the symbol, normally zero + """ + + def __init__(self, symname, addend): + self.addend = addend + self.symname = symname + self._dstsect = None + + def __repr__(self): + return "<unresolved: %s+%d>" % (self.symname, self.addend) + + def __hash__(self): + return hash((self.symname, self.addend)) + + +class ELFData(object): + """ + Actual data somewhere in the ELF file. + + :type dstsect: ELFSubset + :param dstsect: container data area (section or entire file) + :param dstoffs: byte offset into dstsect + :param dstlen: byte size of object, or None if unknown, open-ended or string + """ + + def __init__(self, dstsect, dstoffs, dstlen): + self._dstsect = dstsect + self._dstoffs = dstoffs + self._dstlen = dstlen + self.symname = None + + def __repr__(self): + return "<ptr: %s+0x%05x/%d>" % ( + self._dstsect.name, + self._dstoffs, + self._dstlen or -1, + ) + + def __hash__(self): + return hash((self._dstsect, self._dstoffs)) + + def get_string(self): + """ + Interpret as C string / null terminated UTF-8 and get the actual text. + """ + try: + return self._dstsect[self._dstoffs : str].decode("UTF-8") + except: + import pdb + + pdb.set_trace() + + def get_data(self, reflen): + """ + Interpret as some structure (and check vs. expected length) + + :param reflen: expected size of the object, compared against actual + size (which is only known in rare cases, mostly when directly + accessing a symbol since symbols have their destination object + size recorded) + """ + if self._dstlen is not None and self._dstlen != reflen: + raise ValueError( + "symbol size mismatch (got %d, expected %d)" % (self._dstlen, reflen) + ) + return self._dstsect[self._dstoffs : self._dstoffs + reflen] + + def offset(self, offs, within_symbol=False): + """ + Get another ELFData at an offset + + :param offs: byte offset, can be negative (e.g. in container_of) + :param within_symbol: retain length information + """ + if self._dstlen is None or not within_symbol: + return ELFData(self._dstsect, self._dstoffs + offs, None) + else: + return ELFData(self._dstsect, self._dstoffs + offs, self._dstlen - offs) + + +# +# dissection data items +# + + +class ELFDissectData(object): + """ + Common bits for ELFDissectStruct and ELFDissectUnion + """ + + def __init__(self): + self._data = None + self.elfclass = None + + def __len__(self): + """ + Used for boolean evaluation, e.g. "if struct: ..." + """ + return not ( + isinstance(self._data, ELFNull) or isinstance(self._data, ELFUnresolved) + ) + + def container_of(self, parent, fieldname): + """ + Assume this struct is embedded in a larger struct and get at the larger + + Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)`` + + :param parent: class (not instance) of the larger struct + :param fieldname: fieldname that refers back to this + :returns: instance of parent, with fieldname set to this object + """ + offset = 0 + if not hasattr(parent, "_efields"): + parent._setup_efields() + + for field in parent._efields[self.elfclass]: + if field[0] == fieldname: + break + spec = field[1] + if spec == "P": + spec = "I" if self.elfclass == 32 else "Q" + offset += struct.calcsize(spec) + else: + raise AttributeError("%r not found in %r.fields" % (fieldname, parent)) + + return parent(self._data.offset(-offset), replace={fieldname: self}) + + +class ELFDissectStruct(ELFDissectData): + """ + Decode and provide access to a struct somewhere in the ELF file + + Handles pointers and strings somewhat nicely. Create a subclass for each + struct that is to be accessed, and give a field list in a "fields" + class-member. + + :param dataptr: ELFData referring to the data bits to decode. + :param parent: where this was instantiated from; only for reference, has + no functional impact. + :param replace: substitute data values for specific fields. Used by + `container_of` to replace the inner struct when creating the outer + one. + + .. attribute:: fields + + List of tuples describing the struct members. Items can be: + - ``('name', ELFDissectData)`` - directly embed another struct + - ``('name', 'I')`` - simple data types; second item for struct.unpack + - ``('name', 'I', None)`` - field to ignore + - ``('name', 'P', str)`` - pointer to string + - ``('name', 'P', ELFDissectData)`` - pointer to another struct + + ``P`` is added as unpack format for pointers (sized appropriately for + the ELF file.) + + Refer to tiabwarfo.py for extracting this from ``pahole``. + + TBD: replace tuples with a class. + + .. attribute:: fieldrename + + Dictionary to rename fields, useful if fields comes from tiabwarfo.py. + """ + + class Pointer(object): + """ + Quick wrapper for pointers to further structs + + This is just here to avoid going into infinite loops when loading + structs that have pointers to each other (e.g. struct xref <--> + struct xrefdata.) The pointer destination is only instantiated when + actually accessed. + """ + + def __init__(self, cls, ptr): + self.cls = cls + self.ptr = ptr + + def __repr__(self): + return "<Pointer:%s %r>" % (self.cls.__name__, self.ptr) + + def __call__(self): + if isinstance(self.ptr, ELFNull): + return None + return self.cls(self.ptr) + + def __new__(cls, dataptr, parent=None, replace=None): + if dataptr._dstsect is None: + return super().__new__(cls) + + obj = dataptr._dstsect._pointers.get((cls, dataptr)) + if obj is not None: + return obj + obj = super().__new__(cls) + dataptr._dstsect._pointers[(cls, dataptr)] = obj + return obj + + replacements = "lLnN" + + @classmethod + def _preproc_structspec(cls, elfclass, spec): + elfbits = elfclass + + if hasattr(spec, "calcsize"): + spec = "%ds" % (spec.calcsize(elfclass),) + + if elfbits == 32: + repl = ["i", "I"] + else: + repl = ["q", "Q"] + for c in cls.replacements: + spec = spec.replace(c, repl[int(c.isupper())]) + return spec + + @classmethod + def _setup_efields(cls): + cls._efields = {} + cls._esize = {} + for elfclass in [32, 64]: + cls._efields[elfclass] = [] + size = 0 + for f in cls.fields: + newf = (f[0], cls._preproc_structspec(elfclass, f[1])) + f[2:] + cls._efields[elfclass].append(newf) + size += struct.calcsize(newf[1]) + cls._esize[elfclass] = size + + def __init__(self, dataptr, parent=None, replace=None): + if not hasattr(self.__class__, "_efields"): + self._setup_efields() + + self._fdata = None + self._data = dataptr + self._parent = parent + self.symname = dataptr.symname + if isinstance(dataptr, ELFNull) or isinstance(dataptr, ELFUnresolved): + self._fdata = {} + return + + self._elfsect = dataptr._dstsect + self.elfclass = self._elfsect._elffile.elfclass + self.offset = dataptr._dstoffs + + pspecl = [f[1] for f in self._efields[self.elfclass]] + + # need to correlate output from struct.unpack with extra metadata + # about the particular fields, so note down byte offsets (in locs) + # and tuple indices of pointers (in ptrs) + pspec = "" + locs = {} + ptrs = set() + + for idx, spec in enumerate(pspecl): + if spec == "P": + ptrs.add(idx) + spec = self._elfsect.ptrtype + + locs[idx] = struct.calcsize(pspec) + pspec = pspec + spec + + self._total_size = struct.calcsize(pspec) + + def replace_ptrs(v): + idx, val = v[0], v[1] + if idx not in ptrs: + return val + return self._elfsect.pointer(self.offset + locs[idx]) + + data = dataptr.get_data(struct.calcsize(pspec)) + unpacked = struct.unpack(self._elfsect.endian + pspec, data) + unpacked = list(map(replace_ptrs, enumerate(unpacked))) + self._fraw = unpacked + self._fdata = OrderedDict() + replace = replace or {} + + for i, item in enumerate(unpacked): + name = self.fields[i][0] + if name is None: + continue + + if name in replace: + self._fdata[name] = replace[name] + continue + + if isinstance(self.fields[i][1], type) and issubclass( + self.fields[i][1], ELFDissectData + ): + dataobj = self.fields[i][1](dataptr.offset(locs[i]), self) + self._fdata[name] = dataobj + continue + if len(self.fields[i]) == 3: + if self.fields[i][2] == str: + self._fdata[name] = item.get_string() + continue + elif self.fields[i][2] is None: + pass + elif issubclass(self.fields[i][2], ELFDissectData): + cls = self.fields[i][2] + dataobj = self.Pointer(cls, item) + self._fdata[name] = dataobj + continue + + self._fdata[name] = item + + def __getattr__(self, attrname): + if attrname not in self._fdata: + raise AttributeError(attrname) + if isinstance(self._fdata[attrname], self.Pointer): + self._fdata[attrname] = self._fdata[attrname]() + return self._fdata[attrname] + + def __repr__(self): + if not isinstance(self._data, ELFData): + return "<%s: %r>" % (self.__class__.__name__, self._data) + return "<%s: %s>" % ( + self.__class__.__name__, + ", ".join(["%s=%r" % t for t in self._fdata.items()]), + ) + + @classmethod + def calcsize(cls, elfclass): + """ + Sum up byte size of this struct + + Wraps struct.calcsize with some extra features. + """ + if not hasattr(cls, "_efields"): + cls._setup_efields() + + pspec = "".join([f[1] for f in cls._efields[elfclass]]) + + ptrtype = "I" if elfclass == 32 else "Q" + pspec = pspec.replace("P", ptrtype) + + return struct.calcsize(pspec) + + +class ELFDissectUnion(ELFDissectData): + """ + Decode multiple structs in the same place. + + Not currently used (and hence not tested.) Worked at some point but not + needed anymore and may be borked now. Remove this comment when using. + """ + + members = {} + + def __init__(self, dataptr, parent=None): + self._dataptr = dataptr + self._parent = parent + self.members = [] + for name, membercls in self.__class__.members: + item = membercls(dataptr, parent) + self.members.append(item) + setattr(self, name, item) + + def __repr__(self): + return "<%s: %s>" % ( + self.__class__.__name__, + ", ".join([repr(i) for i in self.members]), + ) + + @classmethod + def calcsize(cls, elfclass): + return max([member.calcsize(elfclass) for name, member in cls.members]) + + +# +# wrappers for spans of ELF data +# + + +class ELFSubset(object): + """ + Common abstract base for section-level and file-level access. + """ + + def __init__(self): + super().__init__() + + self.name = None + self._obj = None + self._elffile = None + self.ptrtype = None + self.endian = None + self._pointers = WeakValueDictionary() + + def _wrap_data(self, data, dstsect): + raise NotImplementedError() + + def __hash__(self): + return hash(self.name) + + def __getitem__(self, k): + """ + Read data from slice + + Subscript **must** be a slice; a simple index will not return a byte + but rather throw an exception. Valid slice syntaxes are defined by + the C module: + + - `this[123:456]` - extract specific range + - `this[123:str]` - extract until null byte. The slice stop value is + the `str` type (or, technically, `unicode`.) + """ + return self._obj[k] + + def getreloc(self, offset): + """ + Check for a relocation record at the specified offset. + """ + return self._obj.getreloc(offset) + + def iter_data(self, scls, slice_=slice(None)): + """ + Assume an array of structs present at a particular slice and decode + + :param scls: ELFDissectData subclass for the struct + :param slice_: optional range specification + """ + size = scls.calcsize(self._elffile.elfclass) + + offset = slice_.start or 0 + stop = slice_.stop or self._obj.len + if stop < 0: + stop = self._obj.len - stop + + while offset < stop: + yield scls(ELFData(self, offset, size)) + offset += size + + def pointer(self, offset): + """ + Try to dereference a pointer value + + This checks whether there's a relocation at the given offset and + uses that; otherwise (e.g. in a non-PIE executable where the pointer + is already resolved by the linker) the data at the location is used. + + :param offset: byte offset from beginning of section, + or virtual address in file + :returns: ELFData wrapping pointed-to object + """ + + ptrsize = struct.calcsize(self.ptrtype) + data = struct.unpack( + self.endian + self.ptrtype, self[offset : offset + ptrsize] + )[0] + + reloc = self.getreloc(offset) + dstsect = None + if reloc: + # section won't be available in whole-file operation + dstsect = reloc.getsection(data) + addend = reloc.r_addend + + if reloc.relative: + # old-style ELF REL instead of RELA, not well-tested + addend += data + + if reloc.unresolved and reloc.symvalid: + return ELFUnresolved(reloc.symname, addend) + elif reloc.symvalid: + data = addend + reloc.st_value + else: + data = addend + + # 0 could technically be a valid pointer for a shared library, + # since libraries may use 0 as default virtual start address (it'll + # be adjusted on loading) + # That said, if the library starts at 0, that's where the ELF header + # would be so it's still an invalid pointer. + if data == 0 and dstsect == None: + return ELFNull() + + # wrap_data is different between file & section + return self._wrap_data(data, dstsect) + + +class ELFDissectSection(ELFSubset): + """ + Access the contents of an ELF section like ``.text`` or ``.data`` + + :param elfwrap: ELFDissectFile wrapper for the file + :param idx: section index in section header table + :param section: section object from C module + """ + + def __init__(self, elfwrap, idx, section): + super().__init__() + + self._elfwrap = elfwrap + self._elffile = elfwrap._elffile + self._idx = idx + self._section = self._obj = section + self.name = section.name + self.ptrtype = elfwrap.ptrtype + self.endian = elfwrap.endian + + def _wrap_data(self, data, dstsect): + if dstsect is None: + dstsect = self._elfwrap._elffile.get_section_addr(data) + offs = data - dstsect.sh_addr + dstsect = self._elfwrap.get_section(dstsect.idx) + return ELFData(dstsect, offs, None) + + +class ELFDissectFile(ELFSubset): + """ + Access the contents of an ELF file. + + Note that offsets for array subscript and relocation/pointer access are + based on the file's virtual address space and are NOT offsets to the + start of the file on disk! + + (Shared libraries frequently have a virtual address space starting at 0, + but non-PIE executables have an architecture specific default loading + address like 0x400000 on x86. + + :param filename: ELF file to open + """ + + def __init__(self, filename): + super().__init__() + + self.name = filename + self._elffile = self._obj = ELFFile(filename) + self._sections = {} + + self.ptrtype = "I" if self._elffile.elfclass == 32 else "Q" + self.endian = ">" if self._elffile.bigendian else "<" + + @property + def _elfwrap(self): + return self + + def _wrap_data(self, data, dstsect): + return ELFData(self, data, None) + + def get_section(self, secname): + """ + Look up section by name or index + """ + if isinstance(secname, int): + sh_idx = secname + section = self._elffile.get_section_idx(secname) + else: + section = self._elffile.get_section(secname) + + if section is None: + return None + + sh_idx = section.idx + + if sh_idx not in self._sections: + self._sections[sh_idx] = ELFDissectSection(self, sh_idx, section) + + return self._sections[sh_idx] diff --git a/python/clippy/uidhash.py b/python/clippy/uidhash.py new file mode 100644 index 0000000..73570b2 --- /dev/null +++ b/python/clippy/uidhash.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# xref unique ID hash calculation +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. + +import struct +from hashlib import sha256 + + +def bititer(data, bits, startbit=True): + """ + just iterate the individual bits out from a bytes object + + if startbit is True, an '1' bit is inserted at the very beginning + goes <bits> at a time, starts at LSB. + """ + bitavail, v = 0, 0 + if startbit and len(data) > 0: + v = data.pop(0) + yield (v & ((1 << bits) - 1)) | (1 << (bits - 1)) + bitavail = 9 - bits + v >>= bits - 1 + + while len(data) > 0: + while bitavail < bits: + v |= data.pop(0) << bitavail + bitavail += 8 + yield v & ((1 << bits) - 1) + bitavail -= bits + v >>= bits + + +def base32c(data): + """ + Crockford base32 with extra dashes + """ + chs = "0123456789ABCDEFGHJKMNPQRSTVWXYZ" + o = "" + if type(data) == str: + data = [ord(v) for v in data] + else: + data = list(data) + for i, bits in enumerate(bititer(data, 5)): + if i == 5: + o = o + "-" + elif i == 10: + break + o = o + chs[bits] + return o + + +def uidhash(filename, hashstr, hashu32a, hashu32b): + """ + xref Unique ID hash used in FRRouting + """ + filename = "/".join(filename.rsplit("/")[-2:]) + + hdata = filename.encode("UTF-8") + hashstr.encode("UTF-8") + hdata += struct.pack(">II", hashu32a, hashu32b) + i = sha256(hdata).digest() + return base32c(i) diff --git a/python/firstheader.py b/python/firstheader.py new file mode 100644 index 0000000..06e2895 --- /dev/null +++ b/python/firstheader.py @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# check that the first header included in C files is either +# zebra.h or config.h +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. + +import sys +import os +import re +import subprocess +import argparse + +argp = argparse.ArgumentParser(description="include fixer") +argp.add_argument("--autofix", action="store_const", const=True) +argp.add_argument("--warn-empty", action="store_const", const=True) +argp.add_argument("--pipe", action="store_const", const=True) + +include_re = re.compile('^#\s*include\s+["<]([^ ">]+)[">]', re.M) + +ignore = [ + lambda fn: fn.startswith("tools/"), + lambda fn: fn + in [ + "lib/elf_py.c", + ], +] + + +def run(args): + out = [] + + files = subprocess.check_output(["git", "ls-files"]).decode("ASCII") + for fn in files.splitlines(): + if not fn.endswith(".c"): + continue + if max([i(fn) for i in ignore]): + continue + + with open(fn, "r") as fd: + data = fd.read() + + m = include_re.search(data) + if m is None: + if args.warn_empty: + sys.stderr.write("no #include in %s?\n" % (fn)) + continue + if m.group(1) in ["config.h", "zebra.h", "lib/zebra.h"]: + continue + + if args.autofix: + sys.stderr.write("%s: %s - fixing\n" % (fn, m.group(0))) + if fn.startswith("pceplib/"): + insert = '#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n' + else: + insert = "#include <zebra.h>\n\n" + + pos = m.span()[0] + + data = data[:pos] + insert + data[pos:] + with open(fn + ".new", "w") as fd: + fd.write(data) + os.rename(fn + ".new", fn) + else: + sys.stderr.write("%s: %s\n" % (fn, m.group(0))) + out.append(fn) + + if len(out): + if args.pipe: + # for "vim `firstheader.py`" + print("\n".join(out)) + return 1 + return 0 + + +if __name__ == "__main__": + args = argp.parse_args() + sys.exit(run(args)) diff --git a/python/makefile.py b/python/makefile.py new file mode 100644 index 0000000..573871f --- /dev/null +++ b/python/makefile.py @@ -0,0 +1,217 @@ +#!/usr/bin/python3 +# +# FRR extended automake/Makefile functionality helper +# +# This script is executed on/after generating Makefile to add some pieces for +# clippy. + +import sys +import os +import subprocess +import re +import argparse +from string import Template +from makevars import MakeReVars + +argp = argparse.ArgumentParser(description="FRR Makefile extensions") +argp.add_argument( + "--dev-build", + action="store_const", + const=True, + help="run additional developer checks", +) +args = argp.parse_args() + +with open("Makefile", "r") as fd: + before = fd.read() + +mv = MakeReVars(before) + +clippy_scan = mv["clippy_scan"].strip().split() +for clippy_file in clippy_scan: + assert clippy_file.endswith(".c") + +xref_targets = [] +for varname in [ + "bin_PROGRAMS", + "sbin_PROGRAMS", + "lib_LTLIBRARIES", + "module_LTLIBRARIES", +]: + xref_targets.extend(mv[varname].strip().split()) + +# check for files using clippy but not listed in clippy_scan +if args.dev_build: + basepath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + if os.path.exists(os.path.join(basepath, ".git")): + clippy_ref = subprocess.check_output( + [ + "git", + "-C", + basepath, + "grep", + "-l", + "-P", + "^#\s*include.*_clippy.c", + "--", + "**.c", + ] + ).decode("US-ASCII") + + clippy_ref = set(clippy_ref.splitlines()) + missing = clippy_ref - set(clippy_scan) + + if len(missing) > 0: + sys.stderr.write( + 'error: files seem to be using clippy, but not listed in "clippy_scan" in subdir.am:\n\t%s\n' + % ("\n\t".join(sorted(missing))) + ) + sys.exit(1) + +# this additional-dependency rule is stuck onto all compile targets that +# compile a file which uses clippy-generated input, so it has a dependency to +# make that first. +clippydep = Template( + """ +${clippybase}.$$(OBJEXT): ${clippybase}_clippy.c +${clippybase}.lo: ${clippybase}_clippy.c +${clippybase}_clippy.c: $$(CLIPPY_DEPS)""" +) + +# this one is used when one .c file is built multiple times with different +# CFLAGS +clippyauxdep = Template( + """# clippy{ +# auxiliary clippy target +${target}: ${clippybase}_clippy.c +# }clippy""" +) + +lines = before.splitlines() +autoderp = "#AUTODERP# " +out_lines = [] +bcdeps = [] +make_rule_re = re.compile("^([^:\s]+):\s*([^:\s]+)\s*($|\n)") + +while lines: + line = lines.pop(0) + if line.startswith(autoderp): + line = line[len(autoderp) :] + + # allow rerunning on already-clippified Makefile + if line == "# clippy{": + while lines: + line = lines.pop(0) + if line == "# }clippy": + break + continue + + if line.startswith("#"): + out_lines.append(line) + continue + + full_line = line + full_lines = lines[:] + while full_line.endswith("\\"): + full_line = full_line[:-1] + full_lines.pop(0) + + m = make_rule_re.match(full_line) + if m is None: + out_lines.append(line) + continue + + line, lines = full_line, full_lines + + target, dep = m.group(1), m.group(2) + + filename = os.path.basename(target) + if "-" in filename: + # dashes in output filename = building same .c with different CFLAGS + am_name, _ = filename.split("-", 1) + am_name = os.path.join(os.path.dirname(target), am_name) + am_name = am_name.replace("/", "_") + extraflags = " $(%s_CFLAGS)" % (am_name,) + else: + # this path isn't really triggered because automake is using a generic + # .c => .o rule unless CFLAGS are customized for a target + extraflags = "" + + if target.endswith(".lo") or target.endswith(".o"): + if not dep.endswith(".h"): + # LLVM bitcode targets for analysis tools + bcdeps.append("%s.bc: %s" % (target, target)) + bcdeps.append( + "\t$(AM_V_LLVM_BC)$(COMPILE)%s -emit-llvm -c -o $@ %s" + % (extraflags, dep) + ) + if m.group(2) in clippy_scan: + # again - this is only hit for targets with custom CFLAGS, because + # automake uses a generic .c -> .o rule for standard CFLAGS + out_lines.append( + clippyauxdep.substitute(target=m.group(1), clippybase=m.group(2)[:-2]) + ) + + out_lines.append(line) + +# now, cover all the .c files that don't have special build rules +out_lines.append("# clippy{\n# main clippy targets") +for clippy_file in clippy_scan: + out_lines.append(clippydep.substitute(clippybase=clippy_file[:-2])) + +# combine daemon .xref files into frr.xref +out_lines.append("") +xref_targets = [ + target + for target in xref_targets + if target + not in [ + "bgpd/rfp-example/rfptest/rfptest", + "pimd/mtracebis", + "tools/ssd", + "vtysh/vtysh", + ] +] +out_lines.append( + "xrefs = %s" % (" ".join(["%s.xref" % target for target in xref_targets])) +) +out_lines.append("frr.xref: $(xrefs)") +out_lines.append("") + +# analog but slower way to get the same frr.xref +# frr.xref: $(bin_PROGRAMS) $(sbin_PROGRAMS) $(lib_LTLIBRARIES) $(module_LTLIBRARIES) +# $(AM_V_XRELFO) $(CLIPPY) $(top_srcdir)/python/xrelfo.py -o $@ $^ + +# LLVM bitcode link targets creating a .bc file for whole daemon or lib +out_lines.append("") +out_lines.extend(bcdeps) +out_lines.append("") +bc_targets = [] +for varname in [ + "bin_PROGRAMS", + "sbin_PROGRAMS", + "lib_LTLIBRARIES", + "module_LTLIBRARIES", + "noinst_LIBRARIES", +]: + bc_targets.extend(mv[varname].strip().split()) +for target in bc_targets: + amtgt = target.replace("/", "_").replace(".", "_").replace("-", "_") + objs = mv[amtgt + "_OBJECTS"].strip().split() + objs = [obj + ".bc" for obj in objs] + deps = mv.get(amtgt + "_DEPENDENCIES", "").strip().split() + deps = [d + ".bc" for d in deps if d.endswith(".a")] + objs.extend(deps) + out_lines.append("%s.bc: %s" % (target, " ".join(objs))) + out_lines.append("\t$(AM_V_LLVM_LD)$(LLVM_LINK) -o $@ $^") + out_lines.append("") + +out_lines.append("# }clippy") +out_lines.append("") + +after = "\n".join(out_lines) +if after == before: + sys.exit(0) + +with open("Makefile.pyout", "w") as fd: + fd.write(after) +os.rename("Makefile.pyout", "Makefile") diff --git a/python/makevars.py b/python/makevars.py new file mode 100644 index 0000000..951cd34 --- /dev/null +++ b/python/makevars.py @@ -0,0 +1,100 @@ +# +# helper class to grab variables from FRR's Makefile +# + +import os +import subprocess +import re + + +class MakeVarsBase(object): + """ + common code between MakeVars and MakeReVars + """ + + def __init__(self): + self._data = dict() + + def __getitem__(self, k): + if k not in self._data: + self.getvars([k]) + return self._data[k] + + def get(self, k, defval=None): + if k not in self._data: + self.getvars([k]) + return self._data.get(k) or defval + + +class MakeVars(MakeVarsBase): + """ + makevars['FOO_CFLAGS'] gets you "FOO_CFLAGS" from Makefile + + This variant works by invoking make as a subprocess, i.e. Makefile must + be valid and working. (This is sometimes a problem if depfiles have not + been generated.) + """ + + def getvars(self, varlist): + """ + get a batch list of variables from make. faster than individual calls. + """ + rdfd, wrfd = os.pipe() + + shvars = ["shvar-%s" % s for s in varlist] + make = subprocess.Popen( + ["make", "-s", "VARFD=%d" % wrfd] + shvars, pass_fds=[wrfd] + ) + os.close(wrfd) + data = b"" + + rdf = os.fdopen(rdfd, "rb") + while True: + rdata = rdf.read() + if len(rdata) == 0: + break + data += rdata + + del rdf + make.wait() + + data = data.decode("US-ASCII").strip().split("\n") + for row in data: + k, v = row.split("=", 1) + v = v[1:-1] + self._data[k] = v + + +class MakeReVars(MakeVarsBase): + """ + makevars['FOO_CFLAGS'] gets you "FOO_CFLAGS" from Makefile + + This variant works by regexing through Makefile. This means the Makefile + does not need to be fully working, but on the other hand it doesn't support + fancy complicated make expressions. + """ + + var_re = re.compile( + r"^([^=#\n\s]+)[ \t]*=[ \t]*([^#\n]*)(?:#.*)?$", flags=re.MULTILINE + ) + repl_re = re.compile(r"\$(?:([A-Za-z])|\(([^\)]+)\))") + + def __init__(self, maketext): + super(MakeReVars, self).__init__() + self._vars = dict(self.var_re.findall(maketext.replace("\\\n", ""))) + + def replacevar(self, match): + varname = match.group(1) or match.group(2) + return self._vars.get(varname, "") + + def getvars(self, varlist): + for varname in varlist: + if varname not in self._vars: + continue + + val, prevval = self._vars[varname], None + while val != prevval: + prevval = val + val = self.repl_re.sub(self.replacevar, val) + + self._data[varname] = val diff --git a/python/runtests.py b/python/runtests.py new file mode 100644 index 0000000..70deaa3 --- /dev/null +++ b/python/runtests.py @@ -0,0 +1,16 @@ +import pytest +import sys +import os + +try: + import _clippy +except ImportError: + sys.stderr.write( + """these tests need to be run with the _clippy C extension +module available. Try running "clippy runtests.py ...". +""" + ) + sys.exit(1) + +os.chdir(os.path.dirname(os.path.abspath(__file__))) +raise SystemExit(pytest.main(sys.argv[1:])) diff --git a/python/test_xrelfo.py b/python/test_xrelfo.py new file mode 100644 index 0000000..c851bb0 --- /dev/null +++ b/python/test_xrelfo.py @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# some basic tests for xrelfo & the python ELF machinery +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. + +import sys +import os +import pytest +from pprint import pprint + +root = os.path.dirname(os.path.dirname(__file__)) +sys.path.append(os.path.join(root, "python")) + +import xrelfo +from clippy import elf, uidhash + + +def test_uidhash(): + assert uidhash.uidhash("lib/test_xref.c", "logging call", 3, 0) == "H7KJB-67TBH" + + +def test_xrelfo_other(): + for data in [ + elf.ELFNull(), + elf.ELFUnresolved("somesym", 0), + ]: + + dissect = xrelfo.XrefPtr(data) + print(repr(dissect)) + + with pytest.raises(AttributeError): + dissect.xref + + +def test_xrelfo_obj(): + xrelfo_ = xrelfo.Xrelfo() + edf = xrelfo_.load_elf(os.path.join(root, "lib/.libs/zclient.o"), "zclient.lo") + xrefs = xrelfo_._xrefs + + with pytest.raises(elf.ELFAccessError): + edf[0:4] + + pprint(xrefs[0]) + pprint(xrefs[0]._data) + + +def test_xrelfo_bin(): + xrelfo_ = xrelfo.Xrelfo() + edf = xrelfo_.load_elf(os.path.join(root, "lib/.libs/libfrr.so"), "libfrr.la") + xrefs = xrelfo_._xrefs + + assert edf[0:4] == b"\x7fELF" + + pprint(xrefs[0]) + pprint(xrefs[0]._data) diff --git a/python/tiabwarfo.py b/python/tiabwarfo.py new file mode 100644 index 0000000..da20801 --- /dev/null +++ b/python/tiabwarfo.py @@ -0,0 +1,225 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# FRR DWARF structure definition extractor +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. + +import sys +import os +import subprocess +import re +import argparse +import json + +structs = [ + "xref", + "xref_logmsg", + "xref_threadsched", + "xref_install_element", + "xrefdata", + "xrefdata_logmsg", + "cmd_element", +] + + +def extract(filename="lib/.libs/libfrr.so"): + """ + Convert output from "pahole" to JSON. + + Example pahole output: + $ pahole -C xref lib/.libs/libfrr.so + struct xref { + struct xrefdata * xrefdata; /* 0 8 */ + enum xref_type type; /* 8 4 */ + int line; /* 12 4 */ + const char * file; /* 16 8 */ + const char * func; /* 24 8 */ + + /* size: 32, cachelines: 1, members: 5 */ + /* last cacheline: 32 bytes */ + }; + """ + pahole = subprocess.check_output( + ["pahole", "-C", ",".join(structs), filename] + ).decode("UTF-8") + + struct_re = re.compile(r"^struct ([^ ]+) \{([^\}]+)};", flags=re.M | re.S) + field_re = re.compile( + r"^\s*(?P<type>[^;\(]+)\s+(?P<name>[^;\[\]]+)(?:\[(?P<array>\d+)\])?;\s*\/\*(?P<comment>.*)\*\/\s*$" + ) + comment_re = re.compile(r"^\s*\/\*.*\*\/\s*$") + + pastructs = struct_re.findall(pahole) + out = {} + + for sname, data in pastructs: + this = out.setdefault(sname, {}) + fields = this.setdefault("fields", []) + + lines = data.strip().splitlines() + + next_offs = 0 + + for line in lines: + if line.strip() == "": + continue + m = comment_re.match(line) + if m is not None: + continue + + m = field_re.match(line) + if m is not None: + offs, size = m.group("comment").strip().split() + offs = int(offs) + size = int(size) + typ_ = m.group("type").strip() + name = m.group("name") + + if name.startswith("(*"): + # function pointer + typ_ = typ_ + " *" + name = name[2:].split(")")[0] + + data = { + "name": name, + "type": typ_, + # 'offset': offs, + # 'size': size, + } + if m.group("array"): + data["array"] = int(m.group("array")) + + fields.append(data) + if offs != next_offs: + raise ValueError( + "%d padding bytes before struct %s.%s" + % (offs - next_offs, sname, name) + ) + next_offs = offs + size + continue + + raise ValueError("cannot process line: %s" % line) + + return out + + +class FieldApplicator(object): + """ + Fill ELFDissectStruct fields list from pahole/JSON + + Uses the JSON file created by the above code to fill in the struct fields + in subclasses of ELFDissectStruct. + """ + + # only what we really need. add more as needed. + packtypes = { + "int": "i", + "uint8_t": "B", + "uint16_t": "H", + "uint32_t": "I", + "char": "s", + } + + def __init__(self, data): + self.data = data + self.classes = [] + self.clsmap = {} + + def add(self, cls): + self.classes.append(cls) + self.clsmap[cls.struct] = cls + + def resolve(self, cls): + out = [] + # offset = 0 + + fieldrename = getattr(cls, "fieldrename", {}) + + def mkname(n): + return (fieldrename.get(n, n),) + + for field in self.data[cls.struct]["fields"]: + typs = field["type"].split() + typs = [i for i in typs if i not in ["const"]] + + # this will break reuse of xrefstructs.json across 32bit & 64bit + # platforms + + # if field['offset'] != offset: + # assert offset < field['offset'] + # out.append(('_pad', '%ds' % (field['offset'] - offset,))) + + # pretty hacky C types handling, but covers what we need + + ptrlevel = 0 + while typs[-1] == "*": + typs.pop(-1) + ptrlevel += 1 + + if ptrlevel > 0: + packtype = ("P", None) + if ptrlevel == 1: + if typs[0] == "char": + packtype = ("P", str) + elif typs[0] == "struct" and typs[1] in self.clsmap: + packtype = ("P", self.clsmap[typs[1]]) + elif typs[0] == "enum": + packtype = ("I",) + elif typs[0] in self.packtypes: + packtype = (self.packtypes[typs[0]],) + elif typs[0] == "struct": + if typs[1] in self.clsmap: + packtype = (self.clsmap[typs[1]],) + else: + raise ValueError( + "embedded struct %s not in extracted data" % (typs[1],) + ) + else: + raise ValueError( + "cannot decode field %s in struct %s (%s)" + % (cls.struct, field["name"], field["type"]) + ) + + if "array" in field and typs[0] == "char": + packtype = ("%ds" % field["array"],) + out.append(mkname(field["name"]) + packtype) + elif "array" in field: + for i in range(0, field["array"]): + out.append(mkname("%s_%d" % (field["name"], i)) + packtype) + else: + out.append(mkname(field["name"]) + packtype) + + # offset = field['offset'] + field['size'] + + cls.fields = out + + def __call__(self): + for cls in self.classes: + self.resolve(cls) + + +def main(): + argp = argparse.ArgumentParser(description="FRR DWARF structure extractor") + argp.add_argument( + "-o", + dest="output", + type=str, + help="write JSON output", + default="python/xrefstructs.json", + ) + argp.add_argument( + "-i", + dest="input", + type=str, + help="ELF file to read", + default="lib/.libs/libfrr.so", + ) + args = argp.parse_args() + + out = extract(args.input) + with open(args.output + ".tmp", "w") as fd: + json.dump(out, fd, indent=2, sort_keys=True) + os.rename(args.output + ".tmp", args.output) + + +if __name__ == "__main__": + main() diff --git a/python/xref2vtysh.py b/python/xref2vtysh.py new file mode 100644 index 0000000..0a7e28e --- /dev/null +++ b/python/xref2vtysh.py @@ -0,0 +1,388 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# FRR xref vtysh command extraction +# +# Copyright (C) 2022 David Lamparter for NetDEF, Inc. + +""" +Generate vtysh_cmd.c from frr .xref file(s). + +This can run either standalone or as part of xrelfo. The latter saves a +non-negligible amount of time (0.5s on average systems, more on e.g. slow ARMs) +since serializing and deserializing JSON is a significant bottleneck in this. +""" + +import sys +import os +import re +import pathlib +import argparse +from collections import defaultdict +import difflib + +import json + +try: + import ujson as json # type: ignore +except ImportError: + pass + +frr_top_src = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +# vtysh needs to know which daemon(s) to send commands to. For lib/, this is +# not quite obvious... + +daemon_flags = { + "lib/agentx.c": "VTYSH_ISISD|VTYSH_RIPD|VTYSH_OSPFD|VTYSH_OSPF6D|VTYSH_BGPD|VTYSH_ZEBRA", + "lib/filter.c": "VTYSH_ACL", + "lib/filter_cli.c": "VTYSH_ACL", + "lib/if.c": "VTYSH_INTERFACE", + "lib/keychain.c": "VTYSH_KEYS", + "lib/mgmt_be_client.c": "VTYSH_STATICD", + "lib/mgmt_fe_client.c": "VTYSH_MGMTD", + "lib/lib_vty.c": "VTYSH_ALL", + "lib/log_vty.c": "VTYSH_ALL", + "lib/nexthop_group.c": "VTYSH_NH_GROUP", + "lib/resolver.c": "VTYSH_NHRPD|VTYSH_BGPD", + "lib/routemap.c": "VTYSH_RMAP", + "lib/routemap_cli.c": "VTYSH_RMAP", + "lib/spf_backoff.c": "VTYSH_ISISD", + "lib/event.c": "VTYSH_ALL", + "lib/vrf.c": "VTYSH_VRF", + "lib/vty.c": "VTYSH_ALL", +} + +vtysh_cmd_head = """/* autogenerated file, DO NOT EDIT! */ +#include <zebra.h> + +#include "command.h" +#include "linklist.h" + +#include "vtysh/vtysh.h" +""" + +if sys.stderr.isatty(): + _fmt_red = "\033[31m" + _fmt_green = "\033[32m" + _fmt_clear = "\033[m" +else: + _fmt_red = _fmt_green = _fmt_clear = "" + + +def c_escape(text: str) -> str: + """ + Escape string for output into C source code. + + Handles only what's needed here. CLI strings and help text don't contain + weird special characters. + """ + return text.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") + + +class NodeDict(defaultdict): + """ + CLI node ID (integer) -> dict of commands in that node. + """ + + nodenames = {} # Dict[int, str] + + def __init__(self): + super().__init__(dict) + + def items_named(self): + for k, v in self.items(): + yield self.nodename(k), v + + @classmethod + def nodename(cls, nodeid: int) -> str: + return cls.nodenames.get(nodeid, str(nodeid)) + + @classmethod + def load_nodenames(cls): + with open(os.path.join(frr_top_src, "lib", "command.h"), "r") as fd: + command_h = fd.read() + + nodes = re.search(r"enum\s+node_type\s+\{(.*?)\}", command_h, re.S) + if nodes is None: + raise RuntimeError( + "regex failed to match on lib/command.h (to get CLI node names)" + ) + + text = nodes.group(1) + text = re.sub(r"/\*.*?\*/", "", text, flags=re.S) + text = re.sub(r"//.*?$", "", text, flags=re.M) + text = text.replace(",", " ") + text = text.split() + + for i, name in enumerate(text): + cls.nodenames[i] = name + + +class CommandEntry: + """ + CLI command definition. + + - one DEFUN creates at most one of these, even if the same command is + installed in multiple CLI nodes (e.g. BGP address-family nodes) + - for each CLI node, commands with the same CLI string are merged. This + is *almost* irrelevant - ospfd & ospf6d define some identical commands + in the route-map node. Those must be merged for things to work + correctly. + """ + + all_defs = [] # List[CommandEntry] + warn_counter = 0 + + def __init__(self, origin, name, spec): + self.origin = origin + self.name = name + self._spec = spec + self._registered = False + + self.cmd = spec["string"] + self._cmd_normalized = self.normalize_cmd(self.cmd) + + self.hidden = "hidden" in spec.get("attrs", []) + self.daemons = self._get_daemons() + + self.doclines = self._spec["doc"].splitlines(keepends=True) + if not self.doclines[-1].endswith("\n"): + self.warn_loc("docstring does not end with \\n") + + def warn_loc(self, wtext, nodename=None): + """ + Print warning with parseable (compiler style) location + + Matching the way compilers emit file/lineno means editors/IDE can + identify / jump to the error location. + """ + + if nodename: + prefix = ": [%s] %s:" % (nodename, self.name) + else: + prefix = ": %s:" % (self.name,) + + for line in wtext.rstrip("\n").split("\n"): + sys.stderr.write( + "%s:%d%s %s\n" + % ( + self._spec["defun"]["file"], + self._spec["defun"]["line"], + prefix, + line, + ) + ) + prefix = "- " + + CommandEntry.warn_counter += 1 + + def _get_daemons(self): + path = pathlib.Path(self.origin) + if path.name == "vtysh": + return {} + + defun_file = os.path.relpath(self._spec["defun"]["file"], frr_top_src) + defun_path = pathlib.Path(defun_file) + + if defun_path.parts[0] != "lib": + if "." not in path.name: + # daemons don't have dots in their filename + return {"VTYSH_" + path.name.upper()} + + # loadable modules - use directory name to determine daemon + return {"VTYSH_" + path.parts[-2].upper()} + + if defun_file in daemon_flags: + return {daemon_flags[defun_file]} + + v6_cmd = "ipv6" in self.name + if defun_file == "lib/plist.c": + if v6_cmd: + return { + "VTYSH_RIPNGD|VTYSH_OSPF6D|VTYSH_BGPD|VTYSH_ZEBRA|VTYSH_PIM6D|VTYSH_BABELD|VTYSH_ISISD|VTYSH_FABRICD" + } + else: + return { + "VTYSH_RIPD|VTYSH_OSPFD|VTYSH_BGPD|VTYSH_ZEBRA|VTYSH_PIMD|VTYSH_EIGRPD|VTYSH_BABELD|VTYSH_ISISD|VTYSH_FABRICD" + } + + if defun_file == "lib/if_rmap.c": + if v6_cmd: + return {"VTYSH_RIPNGD"} + else: + return {"VTYSH_RIPD"} + + return {} + + def __repr__(self): + return "<CommandEntry %s: %r>" % (self.name, self.cmd) + + def register(self): + """Track DEFUNs so each is only output once.""" + if not self._registered: + self.all_defs.append(self) + self._registered = True + return self + + def merge(self, other, nodename): + if self._cmd_normalized != other._cmd_normalized: + self.warn_loc( + "command definition mismatch, first definied as:\n%r" % (self.cmd,), + nodename=nodename, + ) + other.warn_loc("later defined as:\n%r" % (other.cmd,), nodename=nodename) + + if self._spec["doc"] != other._spec["doc"]: + self.warn_loc( + "help string mismatch, first defined here (-)", nodename=nodename + ) + other.warn_loc( + "later defined here (+)\nnote: both commands define %r in same node (%s)" + % (self.cmd, nodename), + nodename=nodename, + ) + + d = difflib.Differ() + for diffline in d.compare(self.doclines, other.doclines): + if diffline.startswith(" "): + continue + if diffline.startswith("+ "): + diffline = _fmt_green + diffline + elif diffline.startswith("- "): + diffline = _fmt_red + diffline + sys.stderr.write("\t" + diffline.rstrip("\n") + _fmt_clear + "\n") + + if self.hidden != other.hidden: + self.warn_loc( + "hidden flag mismatch, first %r here" % (self.hidden,), + nodename=nodename, + ) + other.warn_loc( + "later %r here (+)\nnote: both commands define %r in same node (%s)" + % (other.hidden, self.cmd, nodename), + nodename=nodename, + ) + + # ensure name is deterministic regardless of input DEFUN order + self.name = min([self.name, other.name], key=lambda i: (len(i), i)) + self.daemons.update(other.daemons) + + def get_def(self): + doc = "\n".join(['\t"%s"' % c_escape(line) for line in self.doclines]) + defsh = "DEFSH_HIDDEN" if self.hidden else "DEFSH" + + # make daemon list deterministic + daemons = set() + for daemon in self.daemons: + daemons.update(daemon.split("|")) + daemon_str = "|".join(sorted(daemons)) + + return """ +%s (%s, %s_vtysh, +\t"%s", +%s) +""" % ( + defsh, + daemon_str, + self.name, + c_escape(self.cmd), + doc, + ) + + # accept slightly different command definitions that result in the same command + re_collapse_ws = re.compile(r"\s+") + re_remove_varnames = re.compile(r"\$[a-z][a-z0-9_]*") + + @classmethod + def normalize_cmd(cls, cmd): + cmd = cmd.strip() + cmd = cls.re_collapse_ws.sub(" ", cmd) + cmd = cls.re_remove_varnames.sub("", cmd) + return cmd + + @classmethod + def process(cls, nodes, name, origin, spec): + if "nosh" in spec.get("attrs", []): + return + if origin == "vtysh/vtysh": + return + + if origin == "isisd/fabricd": + # dirty workaround :( + name = "fabricd_" + name + + entry = cls(origin, name, spec) + if not entry.daemons: + return + + for nodedata in spec.get("nodes", []): + node = nodes[nodedata["node"]] + if entry._cmd_normalized not in node: + node[entry._cmd_normalized] = entry.register() + else: + node[entry._cmd_normalized].merge( + entry, nodes.nodename(nodedata["node"]) + ) + + @classmethod + def load(cls, xref): + nodes = NodeDict() + + mgmtname = "mgmtd/libmgmt_be_nb.la" + for cmd_name, origins in xref.get("cli", {}).items(): + # If mgmtd has a yang version of a CLI command, make it the only daemon + # to handle it. For now, daemons can still be compiling their cmds into the + # binaries to allow for running standalone with CLI config files. When they + # do this they will also be present in the xref file, but we want to ignore + # those in vtysh. + if "yang" in origins.get(mgmtname, {}).get("attrs", []): + CommandEntry.process(nodes, cmd_name, mgmtname, origins[mgmtname]) + continue + + for origin, spec in origins.items(): + CommandEntry.process(nodes, cmd_name, origin, spec) + return nodes + + @classmethod + def output_defs(cls, ofd): + for entry in sorted(cls.all_defs, key=lambda i: i.name): + ofd.write(entry.get_def()) + + @classmethod + def output_install(cls, ofd, nodes): + ofd.write("\nvoid vtysh_init_cmd(void)\n{\n") + + for name, items in sorted(nodes.items_named()): + for item in sorted(items.values(), key=lambda i: i.name): + ofd.write("\tinstall_element(%s, &%s_vtysh);\n" % (name, item.name)) + + ofd.write("}\n") + + @classmethod + def run(cls, xref, ofd): + ofd.write(vtysh_cmd_head) + + NodeDict.load_nodenames() + nodes = cls.load(xref) + cls.output_defs(ofd) + cls.output_install(ofd, nodes) + + +def main(): + argp = argparse.ArgumentParser(description="FRR xref to vtysh defs") + argp.add_argument( + "xreffile", metavar="XREFFILE", type=str, help=".xref file to read" + ) + argp.add_argument("-Werror", action="store_const", const=True) + args = argp.parse_args() + + with open(args.xreffile, "r") as fd: + data = json.load(fd) + + CommandEntry.run(data, sys.stdout) + + if args.Werror and CommandEntry.warn_counter: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/python/xrefstructs.json b/python/xrefstructs.json new file mode 100644 index 0000000..25c48c9 --- /dev/null +++ b/python/xrefstructs.json @@ -0,0 +1,140 @@ +{ + "cmd_element": { + "fields": [ + { + "name": "string", + "type": "const char *" + }, + { + "name": "doc", + "type": "const char *" + }, + { + "name": "daemon", + "type": "int" + }, + { + "name": "attr", + "type": "uint32_t" + }, + { + "name": "func", + "type": "int *" + }, + { + "name": "name", + "type": "const char *" + }, + { + "name": "xref", + "type": "struct xref" + } + ] + }, + "xref": { + "fields": [ + { + "name": "xrefdata", + "type": "struct xrefdata *" + }, + { + "name": "type", + "type": "enum xref_type" + }, + { + "name": "line", + "type": "int" + }, + { + "name": "file", + "type": "const char *" + }, + { + "name": "func", + "type": "const char *" + } + ] + }, + "xref_install_element": { + "fields": [ + { + "name": "xref", + "type": "struct xref" + }, + { + "name": "cmd_element", + "type": "const struct cmd_element *" + }, + { + "name": "node_type", + "type": "enum node_type" + } + ] + }, + "xref_logmsg": { + "fields": [ + { + "name": "xref", + "type": "struct xref" + }, + { + "name": "fmtstring", + "type": "const char *" + }, + { + "name": "priority", + "type": "uint32_t" + }, + { + "name": "ec", + "type": "uint32_t" + }, + { + "name": "args", + "type": "const char *" + } + ] + }, + "xref_threadsched": { + "fields": [ + { + "name": "xref", + "type": "struct xref" + }, + { + "name": "funcname", + "type": "const char *" + }, + { + "name": "dest", + "type": "const char *" + }, + { + "name": "thread_type", + "type": "uint32_t" + } + ] + }, + "xrefdata": { + "fields": [ + { + "name": "xref", + "type": "const struct xref *" + }, + { + "array": 16, + "name": "uid", + "type": "char" + }, + { + "name": "hashstr", + "type": "const char *" + }, + { + "array": 2, + "name": "hashu32", + "type": "uint32_t" + } + ] + } +}
\ No newline at end of file diff --git a/python/xrelfo.py b/python/xrelfo.py new file mode 100644 index 0000000..a40b19e --- /dev/null +++ b/python/xrelfo.py @@ -0,0 +1,526 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# FRR ELF xref extractor +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. + +import sys +import os +import struct +import re +import traceback + +json_dump_args = {} + +try: + import ujson as json + + json_dump_args["escape_forward_slashes"] = False +except ImportError: + import json + +import argparse + +from clippy.uidhash import uidhash +from clippy.elf import * +from clippy import frr_top_src, CmdAttr +from tiabwarfo import FieldApplicator +from xref2vtysh import CommandEntry + +try: + with open(os.path.join(frr_top_src, "python", "xrefstructs.json"), "r") as fd: + xrefstructs = json.load(fd) +except FileNotFoundError: + sys.stderr.write( + """ +The "xrefstructs.json" file (created by running tiabwarfo.py with the pahole +tool available) could not be found. It should be included with the sources. +""" + ) + sys.exit(1) + +# constants, need to be kept in sync manually... + +XREFT_EVENTSCHED = 0x100 +XREFT_LOGMSG = 0x200 +XREFT_DEFUN = 0x300 +XREFT_INSTALL_ELEMENT = 0x301 + +# LOG_* +priovals = {} +prios = ["0", "1", "2", "E", "W", "N", "I", "D"] + + +class XrelfoJson(object): + def dump(self): + pass + + def check(self, wopt): + yield from [] + + def to_dict(self, refs): + pass + + +class Xref(ELFDissectStruct, XrelfoJson): + struct = "xref" + fieldrename = {"type": "typ"} + containers = {} + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self._container = None + if self.xrefdata: + self.xrefdata.ref_from(self, self.typ) + + def container(self): + if self._container is None: + if self.typ in self.containers: + self._container = self.container_of(self.containers[self.typ], "xref") + return self._container + + def check(self, *args, **kwargs): + if self._container: + yield from self._container.check(*args, **kwargs) + + +class Xrefdata(ELFDissectStruct): + struct = "xrefdata" + + # uid is all zeroes in the data loaded from ELF + fieldrename = {"uid": "_uid"} + + def ref_from(self, xref, typ): + self.xref = xref + + @property + def uid(self): + if self.hashstr is None: + return None + return uidhash(self.xref.file, self.hashstr, self.hashu32_0, self.hashu32_1) + + +class XrefPtr(ELFDissectStruct): + fields = [ + ("xref", "P", Xref), + ] + + +class XrefThreadSched(ELFDissectStruct, XrelfoJson): + struct = "xref_threadsched" + + +Xref.containers[XREFT_EVENTSCHED] = XrefThreadSched + + +class XrefLogmsg(ELFDissectStruct, XrelfoJson): + struct = "xref_logmsg" + + def _warn_fmt(self, text): + lines = text.split("\n") + yield ( + (self.xref.file, self.xref.line), + "%s:%d: %s (in %s())%s\n" + % ( + self.xref.file, + self.xref.line, + lines[0], + self.xref.func, + "".join(["\n" + l for l in lines[1:]]), + ), + ) + + fmt_regexes = [ + (re.compile(r"([\n\t]+)"), "error: log message contains tab or newline"), + # (re.compile(r'^(\s+)'), 'warning: log message starts with whitespace'), + ( + re.compile(r"^((?:warn(?:ing)?|error):\s*)", re.I), + "warning: log message starts with severity", + ), + ] + arg_regexes = [ + # the (?<![\?:] ) avoids warning for x ? inet_ntop(...) : "(bla)" + ( + re.compile(r"((?<![\?:] )inet_ntop\s*\(\s*(?:[AP]F_INET|2)\s*,)"), + "cleanup: replace inet_ntop(AF_INET, ...) with %pI4", + lambda s: True, + ), + ( + re.compile(r"((?<![\?:] )inet_ntop\s*\(\s*(?:[AP]F_INET6|10)\s*,)"), + "cleanup: replace inet_ntop(AF_INET6, ...) with %pI6", + lambda s: True, + ), + ( + # string split-up here is to not trigger "inet_ntoa forbidden" + re.compile(r"((?<![\?:] )inet_" + r"ntoa)"), + "cleanup: replace inet_" + "ntoa(...) with %pI4", + lambda s: True, + ), + ( + re.compile(r"((?<![\?:] )ipaddr2str)"), + "cleanup: replace ipaddr2str(...) with %pIA", + lambda s: True, + ), + ( + re.compile(r"((?<![\?:] )prefix2str)"), + "cleanup: replace prefix2str(...) with %pFX", + lambda s: True, + ), + ( + re.compile(r"((?<![\?:] )prefix_mac2str)"), + "cleanup: replace prefix_mac2str(...) with %pEA", + lambda s: True, + ), + ( + re.compile(r"((?<![\?:] )sockunion2str)"), + "cleanup: replace sockunion2str(...) with %pSU", + lambda s: True, + ), + # (re.compile(r'^(\s*__(?:func|FUNCTION|PRETTY_FUNCTION)__\s*)'), 'error: debug message starts with __func__', lambda s: (s.priority & 7 == 7) ), + ] + + def check(self, wopt): + def fmt_msg(rex, itext): + if sys.stderr.isatty(): + items = rex.split(itext) + out = [] + for i, text in enumerate(items): + if (i % 2) == 1: + out.append("\033[41;37;1m%s\033[m" % repr(text)[1:-1]) + else: + out.append(repr(text)[1:-1]) + + excerpt = "".join(out) + else: + excerpt = repr(itext)[1:-1] + return excerpt + + if wopt.Wlog_format: + for rex, msg in self.fmt_regexes: + if not rex.search(self.fmtstring): + continue + + excerpt = fmt_msg(rex, self.fmtstring) + yield from self._warn_fmt('%s: "%s"' % (msg, excerpt)) + + if wopt.Wlog_args: + for rex, msg, cond in self.arg_regexes: + if not cond(self): + continue + if not rex.search(self.args): + continue + + excerpt = fmt_msg(rex, self.args) + yield from self._warn_fmt( + '%s:\n\t"%s",\n\t%s' % (msg, repr(self.fmtstring)[1:-1], excerpt) + ) + + def dump(self): + print( + "%-60s %s%s %-25s [EC %d] %s" + % ( + "%s:%d %s()" % (self.xref.file, self.xref.line, self.xref.func), + prios[self.priority & 7], + priovals.get(self.priority & 0x30, " "), + self.xref.xrefdata.uid, + self.ec, + self.fmtstring, + ) + ) + + def to_dict(self, xrelfo): + jsobj = dict([(i, getattr(self.xref, i)) for i in ["file", "line", "func"]]) + if self.ec != 0: + jsobj["ec"] = self.ec + jsobj["fmtstring"] = self.fmtstring + jsobj["args"] = self.args + jsobj["priority"] = self.priority & 7 + jsobj["type"] = "logmsg" + jsobj["binary"] = self._elfsect._elfwrap.orig_filename + + if self.priority & 0x10: + jsobj.setdefault("flags", []).append("errno") + if self.priority & 0x20: + jsobj.setdefault("flags", []).append("getaddrinfo") + + xrelfo["refs"].setdefault(self.xref.xrefdata.uid, []).append(jsobj) + + +Xref.containers[XREFT_LOGMSG] = XrefLogmsg + + +class CmdElement(ELFDissectStruct, XrelfoJson): + struct = "cmd_element" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def to_dict(self, xrelfo): + jsobj = ( + xrelfo["cli"] + .setdefault(self.name, {}) + .setdefault(self._elfsect._elfwrap.orig_filename, {}) + ) + + jsobj.update( + { + "string": self.string, + "doc": self.doc, + } + ) + if self.attr: + jsobj["attr"] = attr = self.attr + for attrname in CmdAttr.__members__: + val = CmdAttr[attrname] + if attr & val: + jsobj.setdefault("attrs", []).append(attrname.lower()) + attr &= ~val + + jsobj["defun"] = dict( + [(i, getattr(self.xref, i)) for i in ["file", "line", "func"]] + ) + + +Xref.containers[XREFT_DEFUN] = CmdElement + + +class XrefInstallElement(ELFDissectStruct, XrelfoJson): + struct = "xref_install_element" + + def to_dict(self, xrelfo): + jsobj = ( + xrelfo["cli"] + .setdefault(self.cmd_element.name, {}) + .setdefault(self._elfsect._elfwrap.orig_filename, {}) + ) + nodes = jsobj.setdefault("nodes", []) + + nodes.append( + { + "node": self.node_type, + "install": dict( + [(i, getattr(self.xref, i)) for i in ["file", "line", "func"]] + ), + } + ) + + +Xref.containers[XREFT_INSTALL_ELEMENT] = XrefInstallElement + +# shove in field defs +fieldapply = FieldApplicator(xrefstructs) +fieldapply.add(Xref) +fieldapply.add(Xrefdata) +fieldapply.add(XrefLogmsg) +fieldapply.add(XrefThreadSched) +fieldapply.add(CmdElement) +fieldapply.add(XrefInstallElement) +fieldapply() + + +class Xrelfo(dict): + def __init__(self): + super().__init__( + { + "refs": {}, + "cli": {}, + } + ) + self._xrefs = [] + + def load_file(self, filename): + orig_filename = filename + if filename.endswith(".la") or filename.endswith(".lo"): + with open(filename, "r") as fd: + for line in fd: + line = line.strip() + if line.startswith("#") or line == "" or "=" not in line: + continue + + var, val = line.split("=", 1) + if var not in ["library_names", "pic_object"]: + continue + if val.startswith("'") or val.startswith('"'): + val = val[1:-1] + + if var == "pic_object": + filename = os.path.join(os.path.dirname(filename), val) + break + + val = val.strip().split()[0] + filename = os.path.join(os.path.dirname(filename), ".libs", val) + break + else: + raise ValueError( + 'could not process libtool file "%s"' % orig_filename + ) + + while True: + with open(filename, "rb") as fd: + hdr = fd.read(4) + + if hdr == b"\x7fELF": + self.load_elf(filename, orig_filename) + return + + if hdr[:2] == b"#!": + path, name = os.path.split(filename) + filename = os.path.join(path, ".libs", name) + continue + + if hdr[:1] == b"{": + with open(filename, "r") as fd: + self.load_json(fd) + return + + raise ValueError("cannot determine file type for %s" % (filename)) + + def load_elf(self, filename, orig_filename): + edf = ELFDissectFile(filename) + edf.orig_filename = orig_filename + + note = edf._elffile.find_note("FRRouting", "XREF") + if note is not None: + endian = ">" if edf._elffile.bigendian else "<" + mem = edf._elffile[note] + if edf._elffile.elfclass == 64: + start, end = struct.unpack(endian + "QQ", mem) + start += note.start + end += note.start + 8 + else: + start, end = struct.unpack(endian + "II", mem) + start += note.start + end += note.start + 4 + + ptrs = edf.iter_data(XrefPtr, slice(start, end)) + + else: + xrefarray = edf.get_section("xref_array") + if xrefarray is None: + raise ValueError("file has neither xref note nor xref_array section") + + ptrs = xrefarray.iter_data(XrefPtr) + + for ptr in ptrs: + if ptr.xref is None: + print("NULL xref") + continue + self._xrefs.append(ptr.xref) + + container = ptr.xref.container() + if container is None: + continue + container.to_dict(self) + + return edf + + def load_json(self, fd): + data = json.load(fd) + for uid, items in data["refs"].items(): + myitems = self["refs"].setdefault(uid, []) + for item in items: + if item in myitems: + continue + myitems.append(item) + + for cmd, items in data["cli"].items(): + self["cli"].setdefault(cmd, {}).update(items) + + return data + + def check(self, checks): + for xref in self._xrefs: + yield from xref.check(checks) + + +def main(): + argp = argparse.ArgumentParser(description="FRR xref ELF extractor") + argp.add_argument("-o", dest="output", type=str, help="write JSON output") + argp.add_argument("--out-by-file", type=str, help="write by-file JSON output") + argp.add_argument("-c", dest="vtysh_cmds", type=str, help="write vtysh_cmd.c") + argp.add_argument("-Wlog-format", action="store_const", const=True) + argp.add_argument("-Wlog-args", action="store_const", const=True) + argp.add_argument("-Werror", action="store_const", const=True) + argp.add_argument("--profile", action="store_const", const=True) + argp.add_argument( + "binaries", + metavar="BINARY", + nargs="+", + type=str, + help="files to read (ELF files or libtool objects)", + ) + args = argp.parse_args() + + if args.profile: + import cProfile + + cProfile.runctx("_main(args)", globals(), {"args": args}, sort="cumtime") + else: + _main(args) + + +def _main(args): + errors = 0 + xrelfo = Xrelfo() + + for fn in args.binaries: + try: + xrelfo.load_file(fn) + except: + errors += 1 + sys.stderr.write("while processing %s:\n" % (fn)) + traceback.print_exc() + + for option in dir(args): + if option.startswith("W") and option != "Werror": + checks = sorted(xrelfo.check(args)) + sys.stderr.write("".join([c[-1] for c in checks])) + + if args.Werror and len(checks) > 0: + errors += 1 + break + + refs = xrelfo["refs"] + + counts = {} + for k, v in refs.items(): + strs = set([i["fmtstring"] for i in v]) + if len(strs) != 1: + print("\033[31;1m%s\033[m" % k) + counts[k] = len(v) + + out = xrelfo + outbyfile = {} + for uid, locs in refs.items(): + for loc in locs: + filearray = outbyfile.setdefault(loc["file"], []) + loc = dict(loc) + del loc["file"] + filearray.append(loc) + + for k in outbyfile.keys(): + outbyfile[k] = sorted(outbyfile[k], key=lambda x: x["line"]) + + if errors: + sys.exit(1) + + if args.output: + with open(args.output + ".tmp", "w") as fd: + json.dump(out, fd, indent=2, sort_keys=True, **json_dump_args) + os.rename(args.output + ".tmp", args.output) + + if args.out_by_file: + with open(args.out_by_file + ".tmp", "w") as fd: + json.dump(outbyfile, fd, indent=2, sort_keys=True, **json_dump_args) + os.rename(args.out_by_file + ".tmp", args.out_by_file) + + if args.vtysh_cmds: + with open(args.vtysh_cmds + ".tmp", "w") as fd: + CommandEntry.run(out, fd) + os.rename(args.vtysh_cmds + ".tmp", args.vtysh_cmds) + if args.Werror and CommandEntry.warn_counter: + sys.exit(1) + + +if __name__ == "__main__": + main() |