diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-09 13:16:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-09 13:16:35 +0000 |
commit | e2bbf175a2184bd76f6c54ccf8456babeb1a46fc (patch) | |
tree | f0b76550d6e6f500ada964a3a4ee933a45e5a6f1 /python/clippy | |
parent | Initial commit. (diff) | |
download | frr-e2bbf175a2184bd76f6c54ccf8456babeb1a46fc.tar.xz frr-e2bbf175a2184bd76f6c54ccf8456babeb1a46fc.zip |
Adding upstream version 9.1.upstream/9.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'python/clippy')
-rw-r--r-- | python/clippy/__init__.py | 92 | ||||
-rw-r--r-- | python/clippy/elf.py | 613 | ||||
-rw-r--r-- | python/clippy/uidhash.py | 61 |
3 files changed, 766 insertions, 0 deletions
diff --git a/python/clippy/__init__.py b/python/clippy/__init__.py new file mode 100644 index 0000000..60119fb --- /dev/null +++ b/python/clippy/__init__.py @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# FRR CLI preprocessor +# +# Copyright (C) 2017 David Lamparter for NetDEF, Inc. + +import os, stat + +try: + from enum import IntFlag as _IntFlag +except ImportError: + # python <3.6 + from enum import IntEnum as _IntFlag # type: ignore + +import _clippy +from _clippy import ( + parse, + Graph, + GraphNode, + CMD_ATTR_YANG, + CMD_ATTR_HIDDEN, + CMD_ATTR_DEPRECATED, + CMD_ATTR_NOSH, +) + + +frr_top_src = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +) + + +def graph_iterate(graph): + """iterator yielding all nodes of a graph + + nodes arrive in input/definition order, graph circles are avoided. + """ + + queue = [(graph.first(), frozenset(), 0)] + while len(queue) > 0: + node, stop, depth = queue.pop(0) + yield node, depth + + join = node.join() + if join is not None: + queue.insert(0, (join, stop.union(frozenset([node])), depth)) + join = frozenset([join]) + + stop = join or stop + nnext = node.next() + for n in reversed(nnext): + if n not in stop and n is not node: + queue.insert(0, (n, stop, depth + 1)) + + +def dump(graph): + """print out clippy.Graph""" + + for i, depth in graph_iterate(graph): + print("\t%s%s %r" % (" " * (depth * 2), i.type, i.text)) + + +def wrdiff(filename, buf, reffiles=[]): + """write buffer to file if contents changed""" + + expl = "" + if hasattr(buf, "getvalue"): + buf = buf.getvalue() + old = None + try: + old = open(filename, "r").read() + except: + pass + if old == buf: + for reffile in reffiles: + # ensure output timestamp is newer than inputs, for make + reftime = os.stat(reffile)[stat.ST_MTIME] + outtime = os.stat(filename)[stat.ST_MTIME] + if outtime <= reftime: + os.utime(filename, (reftime + 1, reftime + 1)) + # sys.stderr.write('%s unchanged, not written\n' % (filename)) + return + + newname = "%s.new-%d" % (filename, os.getpid()) + with open(newname, "w") as out: + out.write(buf) + os.rename(newname, filename) + + +class CmdAttr(_IntFlag): + YANG = CMD_ATTR_YANG + HIDDEN = CMD_ATTR_HIDDEN + DEPRECATED = CMD_ATTR_DEPRECATED + NOSH = CMD_ATTR_NOSH diff --git a/python/clippy/elf.py b/python/clippy/elf.py new file mode 100644 index 0000000..cc442ee --- /dev/null +++ b/python/clippy/elf.py @@ -0,0 +1,613 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# FRR libelf wrapper +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. + +""" +Wrapping layer and additional utility around _clippy.ELFFile. + +Essentially, the C bits have the low-level ELF access bits that should be +fast while this has the bits that string everything together (and would've +been a PITA to do in C.) + +Surprisingly - or maybe through proper engineering - this actually works +across architecture, word size and even endianness boundaries. Both the C +module (through GElf_*) and this code (cf. struct.unpack format mangling +in ELFDissectStruct) will take appropriate measures to flip and resize +fields as needed. +""" + +import struct +from collections import OrderedDict +from weakref import WeakValueDictionary + +from _clippy import ELFFile, ELFAccessError + +# +# data access +# + + +class ELFNull(object): + """ + NULL pointer, returned instead of ELFData + """ + + def __init__(self): + self.symname = None + self._dstsect = None + + def __repr__(self): + return "<ptr: NULL>" + + def __hash__(self): + return hash(None) + + def get_string(self): + return None + + +class ELFUnresolved(object): + """ + Reference to an unresolved external symbol, returned instead of ELFData + + :param symname: name of the referenced symbol + :param addend: offset added to the symbol, normally zero + """ + + def __init__(self, symname, addend): + self.addend = addend + self.symname = symname + self._dstsect = None + + def __repr__(self): + return "<unresolved: %s+%d>" % (self.symname, self.addend) + + def __hash__(self): + return hash((self.symname, self.addend)) + + +class ELFData(object): + """ + Actual data somewhere in the ELF file. + + :type dstsect: ELFSubset + :param dstsect: container data area (section or entire file) + :param dstoffs: byte offset into dstsect + :param dstlen: byte size of object, or None if unknown, open-ended or string + """ + + def __init__(self, dstsect, dstoffs, dstlen): + self._dstsect = dstsect + self._dstoffs = dstoffs + self._dstlen = dstlen + self.symname = None + + def __repr__(self): + return "<ptr: %s+0x%05x/%d>" % ( + self._dstsect.name, + self._dstoffs, + self._dstlen or -1, + ) + + def __hash__(self): + return hash((self._dstsect, self._dstoffs)) + + def get_string(self): + """ + Interpret as C string / null terminated UTF-8 and get the actual text. + """ + try: + return self._dstsect[self._dstoffs : str].decode("UTF-8") + except: + import pdb + + pdb.set_trace() + + def get_data(self, reflen): + """ + Interpret as some structure (and check vs. expected length) + + :param reflen: expected size of the object, compared against actual + size (which is only known in rare cases, mostly when directly + accessing a symbol since symbols have their destination object + size recorded) + """ + if self._dstlen is not None and self._dstlen != reflen: + raise ValueError( + "symbol size mismatch (got %d, expected %d)" % (self._dstlen, reflen) + ) + return self._dstsect[self._dstoffs : self._dstoffs + reflen] + + def offset(self, offs, within_symbol=False): + """ + Get another ELFData at an offset + + :param offs: byte offset, can be negative (e.g. in container_of) + :param within_symbol: retain length information + """ + if self._dstlen is None or not within_symbol: + return ELFData(self._dstsect, self._dstoffs + offs, None) + else: + return ELFData(self._dstsect, self._dstoffs + offs, self._dstlen - offs) + + +# +# dissection data items +# + + +class ELFDissectData(object): + """ + Common bits for ELFDissectStruct and ELFDissectUnion + """ + + def __init__(self): + self._data = None + self.elfclass = None + + def __len__(self): + """ + Used for boolean evaluation, e.g. "if struct: ..." + """ + return not ( + isinstance(self._data, ELFNull) or isinstance(self._data, ELFUnresolved) + ) + + def container_of(self, parent, fieldname): + """ + Assume this struct is embedded in a larger struct and get at the larger + + Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)`` + + :param parent: class (not instance) of the larger struct + :param fieldname: fieldname that refers back to this + :returns: instance of parent, with fieldname set to this object + """ + offset = 0 + if not hasattr(parent, "_efields"): + parent._setup_efields() + + for field in parent._efields[self.elfclass]: + if field[0] == fieldname: + break + spec = field[1] + if spec == "P": + spec = "I" if self.elfclass == 32 else "Q" + offset += struct.calcsize(spec) + else: + raise AttributeError("%r not found in %r.fields" % (fieldname, parent)) + + return parent(self._data.offset(-offset), replace={fieldname: self}) + + +class ELFDissectStruct(ELFDissectData): + """ + Decode and provide access to a struct somewhere in the ELF file + + Handles pointers and strings somewhat nicely. Create a subclass for each + struct that is to be accessed, and give a field list in a "fields" + class-member. + + :param dataptr: ELFData referring to the data bits to decode. + :param parent: where this was instantiated from; only for reference, has + no functional impact. + :param replace: substitute data values for specific fields. Used by + `container_of` to replace the inner struct when creating the outer + one. + + .. attribute:: fields + + List of tuples describing the struct members. Items can be: + - ``('name', ELFDissectData)`` - directly embed another struct + - ``('name', 'I')`` - simple data types; second item for struct.unpack + - ``('name', 'I', None)`` - field to ignore + - ``('name', 'P', str)`` - pointer to string + - ``('name', 'P', ELFDissectData)`` - pointer to another struct + + ``P`` is added as unpack format for pointers (sized appropriately for + the ELF file.) + + Refer to tiabwarfo.py for extracting this from ``pahole``. + + TBD: replace tuples with a class. + + .. attribute:: fieldrename + + Dictionary to rename fields, useful if fields comes from tiabwarfo.py. + """ + + class Pointer(object): + """ + Quick wrapper for pointers to further structs + + This is just here to avoid going into infinite loops when loading + structs that have pointers to each other (e.g. struct xref <--> + struct xrefdata.) The pointer destination is only instantiated when + actually accessed. + """ + + def __init__(self, cls, ptr): + self.cls = cls + self.ptr = ptr + + def __repr__(self): + return "<Pointer:%s %r>" % (self.cls.__name__, self.ptr) + + def __call__(self): + if isinstance(self.ptr, ELFNull): + return None + return self.cls(self.ptr) + + def __new__(cls, dataptr, parent=None, replace=None): + if dataptr._dstsect is None: + return super().__new__(cls) + + obj = dataptr._dstsect._pointers.get((cls, dataptr)) + if obj is not None: + return obj + obj = super().__new__(cls) + dataptr._dstsect._pointers[(cls, dataptr)] = obj + return obj + + replacements = "lLnN" + + @classmethod + def _preproc_structspec(cls, elfclass, spec): + elfbits = elfclass + + if hasattr(spec, "calcsize"): + spec = "%ds" % (spec.calcsize(elfclass),) + + if elfbits == 32: + repl = ["i", "I"] + else: + repl = ["q", "Q"] + for c in cls.replacements: + spec = spec.replace(c, repl[int(c.isupper())]) + return spec + + @classmethod + def _setup_efields(cls): + cls._efields = {} + cls._esize = {} + for elfclass in [32, 64]: + cls._efields[elfclass] = [] + size = 0 + for f in cls.fields: + newf = (f[0], cls._preproc_structspec(elfclass, f[1])) + f[2:] + cls._efields[elfclass].append(newf) + size += struct.calcsize(newf[1]) + cls._esize[elfclass] = size + + def __init__(self, dataptr, parent=None, replace=None): + if not hasattr(self.__class__, "_efields"): + self._setup_efields() + + self._fdata = None + self._data = dataptr + self._parent = parent + self.symname = dataptr.symname + if isinstance(dataptr, ELFNull) or isinstance(dataptr, ELFUnresolved): + self._fdata = {} + return + + self._elfsect = dataptr._dstsect + self.elfclass = self._elfsect._elffile.elfclass + self.offset = dataptr._dstoffs + + pspecl = [f[1] for f in self._efields[self.elfclass]] + + # need to correlate output from struct.unpack with extra metadata + # about the particular fields, so note down byte offsets (in locs) + # and tuple indices of pointers (in ptrs) + pspec = "" + locs = {} + ptrs = set() + + for idx, spec in enumerate(pspecl): + if spec == "P": + ptrs.add(idx) + spec = self._elfsect.ptrtype + + locs[idx] = struct.calcsize(pspec) + pspec = pspec + spec + + self._total_size = struct.calcsize(pspec) + + def replace_ptrs(v): + idx, val = v[0], v[1] + if idx not in ptrs: + return val + return self._elfsect.pointer(self.offset + locs[idx]) + + data = dataptr.get_data(struct.calcsize(pspec)) + unpacked = struct.unpack(self._elfsect.endian + pspec, data) + unpacked = list(map(replace_ptrs, enumerate(unpacked))) + self._fraw = unpacked + self._fdata = OrderedDict() + replace = replace or {} + + for i, item in enumerate(unpacked): + name = self.fields[i][0] + if name is None: + continue + + if name in replace: + self._fdata[name] = replace[name] + continue + + if isinstance(self.fields[i][1], type) and issubclass( + self.fields[i][1], ELFDissectData + ): + dataobj = self.fields[i][1](dataptr.offset(locs[i]), self) + self._fdata[name] = dataobj + continue + if len(self.fields[i]) == 3: + if self.fields[i][2] == str: + self._fdata[name] = item.get_string() + continue + elif self.fields[i][2] is None: + pass + elif issubclass(self.fields[i][2], ELFDissectData): + cls = self.fields[i][2] + dataobj = self.Pointer(cls, item) + self._fdata[name] = dataobj + continue + + self._fdata[name] = item + + def __getattr__(self, attrname): + if attrname not in self._fdata: + raise AttributeError(attrname) + if isinstance(self._fdata[attrname], self.Pointer): + self._fdata[attrname] = self._fdata[attrname]() + return self._fdata[attrname] + + def __repr__(self): + if not isinstance(self._data, ELFData): + return "<%s: %r>" % (self.__class__.__name__, self._data) + return "<%s: %s>" % ( + self.__class__.__name__, + ", ".join(["%s=%r" % t for t in self._fdata.items()]), + ) + + @classmethod + def calcsize(cls, elfclass): + """ + Sum up byte size of this struct + + Wraps struct.calcsize with some extra features. + """ + if not hasattr(cls, "_efields"): + cls._setup_efields() + + pspec = "".join([f[1] for f in cls._efields[elfclass]]) + + ptrtype = "I" if elfclass == 32 else "Q" + pspec = pspec.replace("P", ptrtype) + + return struct.calcsize(pspec) + + +class ELFDissectUnion(ELFDissectData): + """ + Decode multiple structs in the same place. + + Not currently used (and hence not tested.) Worked at some point but not + needed anymore and may be borked now. Remove this comment when using. + """ + + members = {} + + def __init__(self, dataptr, parent=None): + self._dataptr = dataptr + self._parent = parent + self.members = [] + for name, membercls in self.__class__.members: + item = membercls(dataptr, parent) + self.members.append(item) + setattr(self, name, item) + + def __repr__(self): + return "<%s: %s>" % ( + self.__class__.__name__, + ", ".join([repr(i) for i in self.members]), + ) + + @classmethod + def calcsize(cls, elfclass): + return max([member.calcsize(elfclass) for name, member in cls.members]) + + +# +# wrappers for spans of ELF data +# + + +class ELFSubset(object): + """ + Common abstract base for section-level and file-level access. + """ + + def __init__(self): + super().__init__() + + self.name = None + self._obj = None + self._elffile = None + self.ptrtype = None + self.endian = None + self._pointers = WeakValueDictionary() + + def _wrap_data(self, data, dstsect): + raise NotImplementedError() + + def __hash__(self): + return hash(self.name) + + def __getitem__(self, k): + """ + Read data from slice + + Subscript **must** be a slice; a simple index will not return a byte + but rather throw an exception. Valid slice syntaxes are defined by + the C module: + + - `this[123:456]` - extract specific range + - `this[123:str]` - extract until null byte. The slice stop value is + the `str` type (or, technically, `unicode`.) + """ + return self._obj[k] + + def getreloc(self, offset): + """ + Check for a relocation record at the specified offset. + """ + return self._obj.getreloc(offset) + + def iter_data(self, scls, slice_=slice(None)): + """ + Assume an array of structs present at a particular slice and decode + + :param scls: ELFDissectData subclass for the struct + :param slice_: optional range specification + """ + size = scls.calcsize(self._elffile.elfclass) + + offset = slice_.start or 0 + stop = slice_.stop or self._obj.len + if stop < 0: + stop = self._obj.len - stop + + while offset < stop: + yield scls(ELFData(self, offset, size)) + offset += size + + def pointer(self, offset): + """ + Try to dereference a pointer value + + This checks whether there's a relocation at the given offset and + uses that; otherwise (e.g. in a non-PIE executable where the pointer + is already resolved by the linker) the data at the location is used. + + :param offset: byte offset from beginning of section, + or virtual address in file + :returns: ELFData wrapping pointed-to object + """ + + ptrsize = struct.calcsize(self.ptrtype) + data = struct.unpack( + self.endian + self.ptrtype, self[offset : offset + ptrsize] + )[0] + + reloc = self.getreloc(offset) + dstsect = None + if reloc: + # section won't be available in whole-file operation + dstsect = reloc.getsection(data) + addend = reloc.r_addend + + if reloc.relative: + # old-style ELF REL instead of RELA, not well-tested + addend += data + + if reloc.unresolved and reloc.symvalid: + return ELFUnresolved(reloc.symname, addend) + elif reloc.symvalid: + data = addend + reloc.st_value + else: + data = addend + + # 0 could technically be a valid pointer for a shared library, + # since libraries may use 0 as default virtual start address (it'll + # be adjusted on loading) + # That said, if the library starts at 0, that's where the ELF header + # would be so it's still an invalid pointer. + if data == 0 and dstsect == None: + return ELFNull() + + # wrap_data is different between file & section + return self._wrap_data(data, dstsect) + + +class ELFDissectSection(ELFSubset): + """ + Access the contents of an ELF section like ``.text`` or ``.data`` + + :param elfwrap: ELFDissectFile wrapper for the file + :param idx: section index in section header table + :param section: section object from C module + """ + + def __init__(self, elfwrap, idx, section): + super().__init__() + + self._elfwrap = elfwrap + self._elffile = elfwrap._elffile + self._idx = idx + self._section = self._obj = section + self.name = section.name + self.ptrtype = elfwrap.ptrtype + self.endian = elfwrap.endian + + def _wrap_data(self, data, dstsect): + if dstsect is None: + dstsect = self._elfwrap._elffile.get_section_addr(data) + offs = data - dstsect.sh_addr + dstsect = self._elfwrap.get_section(dstsect.idx) + return ELFData(dstsect, offs, None) + + +class ELFDissectFile(ELFSubset): + """ + Access the contents of an ELF file. + + Note that offsets for array subscript and relocation/pointer access are + based on the file's virtual address space and are NOT offsets to the + start of the file on disk! + + (Shared libraries frequently have a virtual address space starting at 0, + but non-PIE executables have an architecture specific default loading + address like 0x400000 on x86. + + :param filename: ELF file to open + """ + + def __init__(self, filename): + super().__init__() + + self.name = filename + self._elffile = self._obj = ELFFile(filename) + self._sections = {} + + self.ptrtype = "I" if self._elffile.elfclass == 32 else "Q" + self.endian = ">" if self._elffile.bigendian else "<" + + @property + def _elfwrap(self): + return self + + def _wrap_data(self, data, dstsect): + return ELFData(self, data, None) + + def get_section(self, secname): + """ + Look up section by name or index + """ + if isinstance(secname, int): + sh_idx = secname + section = self._elffile.get_section_idx(secname) + else: + section = self._elffile.get_section(secname) + + if section is None: + return None + + sh_idx = section.idx + + if sh_idx not in self._sections: + self._sections[sh_idx] = ELFDissectSection(self, sh_idx, section) + + return self._sections[sh_idx] diff --git a/python/clippy/uidhash.py b/python/clippy/uidhash.py new file mode 100644 index 0000000..73570b2 --- /dev/null +++ b/python/clippy/uidhash.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# xref unique ID hash calculation +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. + +import struct +from hashlib import sha256 + + +def bititer(data, bits, startbit=True): + """ + just iterate the individual bits out from a bytes object + + if startbit is True, an '1' bit is inserted at the very beginning + goes <bits> at a time, starts at LSB. + """ + bitavail, v = 0, 0 + if startbit and len(data) > 0: + v = data.pop(0) + yield (v & ((1 << bits) - 1)) | (1 << (bits - 1)) + bitavail = 9 - bits + v >>= bits - 1 + + while len(data) > 0: + while bitavail < bits: + v |= data.pop(0) << bitavail + bitavail += 8 + yield v & ((1 << bits) - 1) + bitavail -= bits + v >>= bits + + +def base32c(data): + """ + Crockford base32 with extra dashes + """ + chs = "0123456789ABCDEFGHJKMNPQRSTVWXYZ" + o = "" + if type(data) == str: + data = [ord(v) for v in data] + else: + data = list(data) + for i, bits in enumerate(bititer(data, 5)): + if i == 5: + o = o + "-" + elif i == 10: + break + o = o + chs[bits] + return o + + +def uidhash(filename, hashstr, hashu32a, hashu32b): + """ + xref Unique ID hash used in FRRouting + """ + filename = "/".join(filename.rsplit("/")[-2:]) + + hdata = filename.encode("UTF-8") + hashstr.encode("UTF-8") + hdata += struct.pack(">II", hashu32a, hashu32b) + i = sha256(hdata).digest() + return base32c(i) |