3 files changed, 766 insertions, 0 deletions
diff --git a/python/clippy/__init__.py b/python/clippy/__init__.py
new file mode 100644
index 0000000..60119fb
--- /dev/null
+++ b/python/clippy/__init__.py
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# FRR CLI preprocessor
+#
+# Copyright (C) 2017  David Lamparter for NetDEF, Inc.
+
+import os, stat
+
+try:
+    from enum import IntFlag as _IntFlag
+except ImportError:
+    # python <3.6
+    from enum import IntEnum as _IntFlag  # type: ignore
+
+import _clippy
+from _clippy import (
+    parse,
+    Graph,
+    GraphNode,
+    CMD_ATTR_YANG,
+    CMD_ATTR_HIDDEN,
+    CMD_ATTR_DEPRECATED,
+    CMD_ATTR_NOSH,
+)
+
+
+frr_top_src = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+)
+
+
+def graph_iterate(graph):
+    """iterator yielding all nodes of a graph
+
+    nodes arrive in input/definition order, graph circles are avoided.
+    """
+
+    queue = [(graph.first(), frozenset(), 0)]
+    while len(queue) > 0:
+        node, stop, depth = queue.pop(0)
+        yield node, depth
+
+        join = node.join()
+        if join is not None:
+            queue.insert(0, (join, stop.union(frozenset([node])), depth))
+            join = frozenset([join])
+
+        stop = join or stop
+        nnext = node.next()
+        for n in reversed(nnext):
+            if n not in stop and n is not node:
+                queue.insert(0, (n, stop, depth + 1))
+
+
+def dump(graph):
+    """print out clippy.Graph"""
+
+    for i, depth in graph_iterate(graph):
+        print("\t%s%s %r" % ("  " * (depth * 2), i.type, i.text))
+
+
+def wrdiff(filename, buf, reffiles=[]):
+    """write buffer to file if contents changed"""
+
+    expl = ""
+    if hasattr(buf, "getvalue"):
+        buf = buf.getvalue()
+    old = None
+    try:
+        old = open(filename, "r").read()
+    except:
+        pass
+    if old == buf:
+        for reffile in reffiles:
+            # ensure output timestamp is newer than inputs, for make
+            reftime = os.stat(reffile)[stat.ST_MTIME]
+            outtime = os.stat(filename)[stat.ST_MTIME]
+            if outtime <= reftime:
+                os.utime(filename, (reftime + 1, reftime + 1))
+        # sys.stderr.write('%s unchanged, not written\n' % (filename))
+        return
+
+    newname = "%s.new-%d" % (filename, os.getpid())
+    with open(newname, "w") as out:
+        out.write(buf)
+    os.rename(newname, filename)
+
+
+class CmdAttr(_IntFlag):
+    YANG = CMD_ATTR_YANG
+    HIDDEN = CMD_ATTR_HIDDEN
+    DEPRECATED = CMD_ATTR_DEPRECATED
+    NOSH = CMD_ATTR_NOSH
diff --git a/python/clippy/elf.py b/python/clippy/elf.py
new file mode 100644
index 0000000..cc442ee
--- /dev/null
+++ b/python/clippy/elf.py
@@ -0,0 +1,613 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# FRR libelf wrapper
+#
+# Copyright (C) 2020  David Lamparter for NetDEF, Inc.
+
+"""
+Wrapping layer and additional utility around _clippy.ELFFile.
+
+Essentially, the C bits have the low-level ELF access bits that should be
+fast while this has the bits that string everything together (and would've
+been a PITA to do in C.)
+
+Surprisingly - or maybe through proper engineering - this actually works
+across architecture, word size and even endianness boundaries.  Both the C
+module (through GElf_*) and this code (cf. struct.unpack format mangling
+in ELFDissectStruct) will take appropriate measures to flip and resize
+fields as needed.
+"""
+
+import struct
+from collections import OrderedDict
+from weakref import WeakValueDictionary
+
+from _clippy import ELFFile, ELFAccessError
+
+#
+# data access
+#
+
+
+class ELFNull(object):
+    """
+    NULL pointer, returned instead of ELFData
+    """
+
+    def __init__(self):
+        self.symname = None
+        self._dstsect = None
+
+    def __repr__(self):
+        return "<ptr: NULL>"
+
+    def __hash__(self):
+        return hash(None)
+
+    def get_string(self):
+        return None
+
+
+class ELFUnresolved(object):
+    """
+    Reference to an unresolved external symbol, returned instead of ELFData
+
+    :param symname: name of the referenced symbol
+    :param addend:  offset added to the symbol, normally zero
+    """
+
+    def __init__(self, symname, addend):
+        self.addend = addend
+        self.symname = symname
+        self._dstsect = None
+
+    def __repr__(self):
+        return "<unresolved: %s+%d>" % (self.symname, self.addend)
+
+    def __hash__(self):
+        return hash((self.symname, self.addend))
+
+
+class ELFData(object):
+    """
+    Actual data somewhere in the ELF file.
+
+    :type dstsect:  ELFSubset
+    :param dstsect: container data area (section or entire file)
+    :param dstoffs: byte offset into dstsect
+    :param dstlen:  byte size of object, or None if unknown, open-ended or string
+    """
+
+    def __init__(self, dstsect, dstoffs, dstlen):
+        self._dstsect = dstsect
+        self._dstoffs = dstoffs
+        self._dstlen = dstlen
+        self.symname = None
+
+    def __repr__(self):
+        return "<ptr: %s+0x%05x/%d>" % (
+            self._dstsect.name,
+            self._dstoffs,
+            self._dstlen or -1,
+        )
+
+    def __hash__(self):
+        return hash((self._dstsect, self._dstoffs))
+
+    def get_string(self):
+        """
+        Interpret as C string / null terminated UTF-8 and get the actual text.
+        """
+        try:
+            return self._dstsect[self._dstoffs : str].decode("UTF-8")
+        except:
+            import pdb
+
+            pdb.set_trace()
+
+    def get_data(self, reflen):
+        """
+        Interpret as some structure (and check vs. expected length)
+
+        :param reflen: expected size of the object, compared against actual
+            size (which is only known in rare cases, mostly when directly
+            accessing a symbol since symbols have their destination object
+            size recorded)
+        """
+        if self._dstlen is not None and self._dstlen != reflen:
+            raise ValueError(
+                "symbol size mismatch (got %d, expected %d)" % (self._dstlen, reflen)
+            )
+        return self._dstsect[self._dstoffs : self._dstoffs + reflen]
+
+    def offset(self, offs, within_symbol=False):
+        """
+        Get another ELFData at an offset
+
+        :param offs:          byte offset, can be negative (e.g. in container_of)
+        :param within_symbol: retain length information
+        """
+        if self._dstlen is None or not within_symbol:
+            return ELFData(self._dstsect, self._dstoffs + offs, None)
+        else:
+            return ELFData(self._dstsect, self._dstoffs + offs, self._dstlen - offs)
+
+
+#
+# dissection data items
+#
+
+
+class ELFDissectData(object):
+    """
+    Common bits for ELFDissectStruct and ELFDissectUnion
+    """
+
+    def __init__(self):
+        self._data = None
+        self.elfclass = None
+
+    def __len__(self):
+        """
+        Used for boolean evaluation, e.g. "if struct: ..."
+        """
+        return not (
+            isinstance(self._data, ELFNull) or isinstance(self._data, ELFUnresolved)
+        )
+
+    def container_of(self, parent, fieldname):
+        """
+        Assume this struct is embedded in a larger struct and get at the larger
+
+        Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)``
+
+        :param parent:    class (not instance) of the larger struct
+        :param fieldname: fieldname that refers back to this
+        :returns:         instance of parent, with fieldname set to this object
+        """
+        offset = 0
+        if not hasattr(parent, "_efields"):
+            parent._setup_efields()
+
+        for field in parent._efields[self.elfclass]:
+            if field[0] == fieldname:
+                break
+            spec = field[1]
+            if spec == "P":
+                spec = "I" if self.elfclass == 32 else "Q"
+            offset += struct.calcsize(spec)
+        else:
+            raise AttributeError("%r not found in %r.fields" % (fieldname, parent))
+
+        return parent(self._data.offset(-offset), replace={fieldname: self})
+
+
+class ELFDissectStruct(ELFDissectData):
+    """
+    Decode and provide access to a struct somewhere in the ELF file
+
+    Handles pointers and strings somewhat nicely.  Create a subclass for each
+    struct that is to be accessed, and give a field list in a "fields"
+    class-member.
+
+    :param dataptr: ELFData referring to the data bits to decode.
+    :param parent:  where this was instantiated from; only for reference, has
+        no functional impact.
+    :param replace: substitute data values for specific fields.  Used by
+        `container_of` to replace the inner struct when creating the outer
+        one.
+
+    .. attribute:: fields
+
+       List of tuples describing the struct members.  Items can be:
+       - ``('name', ELFDissectData)`` - directly embed another struct
+       - ``('name', 'I')`` - simple data types; second item for struct.unpack
+       - ``('name', 'I', None)`` - field to ignore
+       - ``('name', 'P', str)`` - pointer to string
+       - ``('name', 'P', ELFDissectData)`` - pointer to another struct
+
+       ``P`` is added as unpack format for pointers (sized appropriately for
+       the ELF file.)
+
+       Refer to tiabwarfo.py for extracting this from ``pahole``.
+
+       TBD: replace tuples with a class.
+
+    .. attribute:: fieldrename
+
+       Dictionary to rename fields, useful if fields comes from tiabwarfo.py.
+    """
+
+    class Pointer(object):
+        """
+        Quick wrapper for pointers to further structs
+
+        This is just here to avoid going into infinite loops when loading
+        structs that have pointers to each other (e.g. struct xref <-->
+        struct xrefdata.)  The pointer destination is only instantiated when
+        actually accessed.
+        """
+
+        def __init__(self, cls, ptr):
+            self.cls = cls
+            self.ptr = ptr
+
+        def __repr__(self):
+            return "<Pointer:%s %r>" % (self.cls.__name__, self.ptr)
+
+        def __call__(self):
+            if isinstance(self.ptr, ELFNull):
+                return None
+            return self.cls(self.ptr)
+
+    def __new__(cls, dataptr, parent=None, replace=None):
+        if dataptr._dstsect is None:
+            return super().__new__(cls)
+
+        obj = dataptr._dstsect._pointers.get((cls, dataptr))
+        if obj is not None:
+            return obj
+        obj = super().__new__(cls)
+        dataptr._dstsect._pointers[(cls, dataptr)] = obj
+        return obj
+
+    replacements = "lLnN"
+
+    @classmethod
+    def _preproc_structspec(cls, elfclass, spec):
+        elfbits = elfclass
+
+        if hasattr(spec, "calcsize"):
+            spec = "%ds" % (spec.calcsize(elfclass),)
+
+        if elfbits == 32:
+            repl = ["i", "I"]
+        else:
+            repl = ["q", "Q"]
+        for c in cls.replacements:
+            spec = spec.replace(c, repl[int(c.isupper())])
+        return spec
+
+    @classmethod
+    def _setup_efields(cls):
+        cls._efields = {}
+        cls._esize = {}
+        for elfclass in [32, 64]:
+            cls._efields[elfclass] = []
+            size = 0
+            for f in cls.fields:
+                newf = (f[0], cls._preproc_structspec(elfclass, f[1])) + f[2:]
+                cls._efields[elfclass].append(newf)
+                size += struct.calcsize(newf[1])
+            cls._esize[elfclass] = size
+
+    def __init__(self, dataptr, parent=None, replace=None):
+        if not hasattr(self.__class__, "_efields"):
+            self._setup_efields()
+
+        self._fdata = None
+        self._data = dataptr
+        self._parent = parent
+        self.symname = dataptr.symname
+        if isinstance(dataptr, ELFNull) or isinstance(dataptr, ELFUnresolved):
+            self._fdata = {}
+            return
+
+        self._elfsect = dataptr._dstsect
+        self.elfclass = self._elfsect._elffile.elfclass
+        self.offset = dataptr._dstoffs
+
+        pspecl = [f[1] for f in self._efields[self.elfclass]]
+
+        # need to correlate output from struct.unpack with extra metadata
+        # about the particular fields, so note down byte offsets (in locs)
+        # and tuple indices of pointers (in ptrs)
+        pspec = ""
+        locs = {}
+        ptrs = set()
+
+        for idx, spec in enumerate(pspecl):
+            if spec == "P":
+                ptrs.add(idx)
+                spec = self._elfsect.ptrtype
+
+            locs[idx] = struct.calcsize(pspec)
+            pspec = pspec + spec
+
+        self._total_size = struct.calcsize(pspec)
+
+        def replace_ptrs(v):
+            idx, val = v[0], v[1]
+            if idx not in ptrs:
+                return val
+            return self._elfsect.pointer(self.offset + locs[idx])
+
+        data = dataptr.get_data(struct.calcsize(pspec))
+        unpacked = struct.unpack(self._elfsect.endian + pspec, data)
+        unpacked = list(map(replace_ptrs, enumerate(unpacked)))
+        self._fraw = unpacked
+        self._fdata = OrderedDict()
+        replace = replace or {}
+
+        for i, item in enumerate(unpacked):
+            name = self.fields[i][0]
+            if name is None:
+                continue
+
+            if name in replace:
+                self._fdata[name] = replace[name]
+                continue
+
+            if isinstance(self.fields[i][1], type) and issubclass(
+                self.fields[i][1], ELFDissectData
+            ):
+                dataobj = self.fields[i][1](dataptr.offset(locs[i]), self)
+                self._fdata[name] = dataobj
+                continue
+            if len(self.fields[i]) == 3:
+                if self.fields[i][2] == str:
+                    self._fdata[name] = item.get_string()
+                    continue
+                elif self.fields[i][2] is None:
+                    pass
+                elif issubclass(self.fields[i][2], ELFDissectData):
+                    cls = self.fields[i][2]
+                    dataobj = self.Pointer(cls, item)
+                    self._fdata[name] = dataobj
+                    continue
+
+            self._fdata[name] = item
+
+    def __getattr__(self, attrname):
+        if attrname not in self._fdata:
+            raise AttributeError(attrname)
+        if isinstance(self._fdata[attrname], self.Pointer):
+            self._fdata[attrname] = self._fdata[attrname]()
+        return self._fdata[attrname]
+
+    def __repr__(self):
+        if not isinstance(self._data, ELFData):
+            return "<%s: %r>" % (self.__class__.__name__, self._data)
+        return "<%s: %s>" % (
+            self.__class__.__name__,
+            ", ".join(["%s=%r" % t for t in self._fdata.items()]),
+        )
+
+    @classmethod
+    def calcsize(cls, elfclass):
+        """
+        Sum up byte size of this struct
+
+        Wraps struct.calcsize with some extra features.
+        """
+        if not hasattr(cls, "_efields"):
+            cls._setup_efields()
+
+        pspec = "".join([f[1] for f in cls._efields[elfclass]])
+
+        ptrtype = "I" if elfclass == 32 else "Q"
+        pspec = pspec.replace("P", ptrtype)
+
+        return struct.calcsize(pspec)
+
+
+class ELFDissectUnion(ELFDissectData):
+    """
+    Decode multiple structs in the same place.
+
+    Not currently used (and hence not tested.)  Worked at some point but not
+    needed anymore and may be borked now.  Remove this comment when using.
+    """
+
+    members = {}
+
+    def __init__(self, dataptr, parent=None):
+        self._dataptr = dataptr
+        self._parent = parent
+        self.members = []
+        for name, membercls in self.__class__.members:
+            item = membercls(dataptr, parent)
+            self.members.append(item)
+            setattr(self, name, item)
+
+    def __repr__(self):
+        return "<%s: %s>" % (
+            self.__class__.__name__,
+            ", ".join([repr(i) for i in self.members]),
+        )
+
+    @classmethod
+    def calcsize(cls, elfclass):
+        return max([member.calcsize(elfclass) for name, member in cls.members])
+
+
+#
+# wrappers for spans of ELF data
+#
+
+
+class ELFSubset(object):
+    """
+    Common abstract base for section-level and file-level access.
+    """
+
+    def __init__(self):
+        super().__init__()
+
+        self.name = None
+        self._obj = None
+        self._elffile = None
+        self.ptrtype = None
+        self.endian = None
+        self._pointers = WeakValueDictionary()
+
+    def _wrap_data(self, data, dstsect):
+        raise NotImplementedError()
+
+    def __hash__(self):
+        return hash(self.name)
+
+    def __getitem__(self, k):
+        """
+        Read data from slice
+
+        Subscript **must** be a slice; a simple index will not return a byte
+        but rather throw an exception.  Valid slice syntaxes are defined by
+        the C module:
+
+        - `this[123:456]` - extract specific range
+        - `this[123:str]` - extract until null byte.  The slice stop value is
+            the `str` type (or, technically, `unicode`.)
+        """
+        return self._obj[k]
+
+    def getreloc(self, offset):
+        """
+        Check for a relocation record at the specified offset.
+        """
+        return self._obj.getreloc(offset)
+
+    def iter_data(self, scls, slice_=slice(None)):
+        """
+        Assume an array of structs present at a particular slice and decode
+
+        :param scls:   ELFDissectData subclass for the struct
+        :param slice_: optional range specification
+        """
+        size = scls.calcsize(self._elffile.elfclass)
+
+        offset = slice_.start or 0
+        stop = slice_.stop or self._obj.len
+        if stop < 0:
+            stop = self._obj.len - stop
+
+        while offset < stop:
+            yield scls(ELFData(self, offset, size))
+            offset += size
+
+    def pointer(self, offset):
+        """
+        Try to dereference a pointer value
+
+        This checks whether there's a relocation at the given offset and
+        uses that;  otherwise (e.g. in a non-PIE executable where the pointer
+        is already resolved by the linker) the data at the location is used.
+
+        :param offset: byte offset from beginning of section,
+            or virtual address in file
+        :returns:      ELFData wrapping pointed-to object
+        """
+
+        ptrsize = struct.calcsize(self.ptrtype)
+        data = struct.unpack(
+            self.endian + self.ptrtype, self[offset : offset + ptrsize]
+        )[0]
+
+        reloc = self.getreloc(offset)
+        dstsect = None
+        if reloc:
+            # section won't be available in whole-file operation
+            dstsect = reloc.getsection(data)
+            addend = reloc.r_addend
+
+            if reloc.relative:
+                # old-style ELF REL instead of RELA, not well-tested
+                addend += data
+
+            if reloc.unresolved and reloc.symvalid:
+                return ELFUnresolved(reloc.symname, addend)
+            elif reloc.symvalid:
+                data = addend + reloc.st_value
+            else:
+                data = addend
+
+        # 0 could technically be a valid pointer for a shared library,
+        # since libraries may use 0 as default virtual start address (it'll
+        # be adjusted on loading)
+        # That said, if the library starts at 0, that's where the ELF header
+        # would be so it's still an invalid pointer.
+        if data == 0 and dstsect == None:
+            return ELFNull()
+
+        # wrap_data is different between file & section
+        return self._wrap_data(data, dstsect)
+
+
+class ELFDissectSection(ELFSubset):
+    """
+    Access the contents of an ELF section like ``.text`` or ``.data``
+
+    :param elfwrap: ELFDissectFile wrapper for the file
+    :param idx:     section index in section header table
+    :param section: section object from C module
+    """
+
+    def __init__(self, elfwrap, idx, section):
+        super().__init__()
+
+        self._elfwrap = elfwrap
+        self._elffile = elfwrap._elffile
+        self._idx = idx
+        self._section = self._obj = section
+        self.name = section.name
+        self.ptrtype = elfwrap.ptrtype
+        self.endian = elfwrap.endian
+
+    def _wrap_data(self, data, dstsect):
+        if dstsect is None:
+            dstsect = self._elfwrap._elffile.get_section_addr(data)
+        offs = data - dstsect.sh_addr
+        dstsect = self._elfwrap.get_section(dstsect.idx)
+        return ELFData(dstsect, offs, None)
+
+
+class ELFDissectFile(ELFSubset):
+    """
+    Access the contents of an ELF file.
+
+    Note that offsets for array subscript and relocation/pointer access are
+    based on the file's virtual address space and are NOT offsets to the
+    start of the file on disk!
+
+    (Shared libraries frequently have a virtual address space starting at 0,
+    but non-PIE executables have an architecture specific default loading
+    address like 0x400000 on x86.
+
+    :param filename: ELF file to open
+    """
+
+    def __init__(self, filename):
+        super().__init__()
+
+        self.name = filename
+        self._elffile = self._obj = ELFFile(filename)
+        self._sections = {}
+
+        self.ptrtype = "I" if self._elffile.elfclass == 32 else "Q"
+        self.endian = ">" if self._elffile.bigendian else "<"
+
+    @property
+    def _elfwrap(self):
+        return self
+
+    def _wrap_data(self, data, dstsect):
+        return ELFData(self, data, None)
+
+    def get_section(self, secname):
+        """
+        Look up section by name or index
+        """
+        if isinstance(secname, int):
+            sh_idx = secname
+            section = self._elffile.get_section_idx(secname)
+        else:
+            section = self._elffile.get_section(secname)
+
+        if section is None:
+            return None
+
+        sh_idx = section.idx
+
+        if sh_idx not in self._sections:
+            self._sections[sh_idx] = ELFDissectSection(self, sh_idx, section)
+
+        return self._sections[sh_idx]
diff --git a/python/clippy/uidhash.py b/python/clippy/uidhash.py
new file mode 100644
index 0000000..73570b2
--- /dev/null
+++ b/python/clippy/uidhash.py
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# xref unique ID hash calculation
+#
+# Copyright (C) 2020  David Lamparter for NetDEF, Inc.
+
+import struct
+from hashlib import sha256
+
+
+def bititer(data, bits, startbit=True):
+    """
+    just iterate the individual bits out from a bytes object
+
+    if startbit is True, an '1' bit is inserted at the very beginning
+    goes <bits> at a time, starts at LSB.
+    """
+    bitavail, v = 0, 0
+    if startbit and len(data) > 0:
+        v = data.pop(0)
+        yield (v & ((1 << bits) - 1)) | (1 << (bits - 1))
+        bitavail = 9 - bits
+        v >>= bits - 1
+
+    while len(data) > 0:
+        while bitavail < bits:
+            v |= data.pop(0) << bitavail
+            bitavail += 8
+        yield v & ((1 << bits) - 1)
+        bitavail -= bits
+        v >>= bits
+
+
+def base32c(data):
+    """
+    Crockford base32 with extra dashes
+    """
+    chs = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
+    o = ""
+    if type(data) == str:
+        data = [ord(v) for v in data]
+    else:
+        data = list(data)
+    for i, bits in enumerate(bititer(data, 5)):
+        if i == 5:
+            o = o + "-"
+        elif i == 10:
+            break
+        o = o + chs[bits]
+    return o
+
+
+def uidhash(filename, hashstr, hashu32a, hashu32b):
+    """
+    xref Unique ID hash used in FRRouting
+    """
+    filename = "/".join(filename.rsplit("/")[-2:])
+
+    hdata = filename.encode("UTF-8") + hashstr.encode("UTF-8")
+    hdata += struct.pack(">II", hashu32a, hashu32b)
+    i = sha256(hdata).digest()
+    return base32c(i)