diff options
Diffstat (limited to '')
-rw-r--r-- | lib/elf_py.c | 1385 |
1 files changed, 1385 insertions, 0 deletions
diff --git a/lib/elf_py.c b/lib/elf_py.c new file mode 100644 index 0000000..7c503cf --- /dev/null +++ b/lib/elf_py.c @@ -0,0 +1,1385 @@ +/* + * fast ELF file accessor + * Copyright (C) 2018-2020 David Lamparter for NetDEF, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* Note: this wrapper is intended to be used as build-time helper. While + * it should be generally correct and proper, there may be the occasional + * memory leak or SEGV for things that haven't been well-tested. + * _ + * / \ This code is NOT SUITABLE FOR UNTRUSTED ELF FILES. It's used + * / ! \ in FRR to read files created by its own build. Don't take it out + * /_____\ of FRR and use it to parse random ELF files you found somewhere. + * + * If you're working with this code (or even reading it), you really need to + * read a bunch of the ELF specs. There's no way around it, things in here + * just represent pieces of ELF pretty much 1:1. Also, readelf & objdump are + * your friends. + * + * Required reading: + * https://refspecs.linuxfoundation.org/elf/elf.pdf + * https://refspecs.linuxfoundation.org/elf/x86_64-SysV-psABI.pdf + * Recommended reading: + * https://github.com/ARM-software/abi-aa/releases/download/2020Q4/aaelf64.pdf + * + * The core ELF spec is *not* enough, you should read at least one of the + * processor specific (psABI) docs. They define what & how relocations work. + * Luckily we don't need to care about the processor specifics since this only + * does data relocations, but without looking at the psABI, some things aren't + * quite clear. + */ + +/* the API of this module roughly follows a very small subset of the one + * provided by the python elfutils package, which unfortunately is painfully + * slow. + */ + +#define PY_SSIZE_T_CLEAN + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include <Python.h> +#include "structmember.h" +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> + +#if defined(__sun__) && (__SIZEOF_POINTER__ == 4) +/* Solaris libelf bails otherwise ... */ +#undef _FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 32 +#endif + +#include <elf.h> +#include <libelf.h> +#include <gelf.h> + +#include "typesafe.h" +#include "jhash.h" +#include "clippy.h" + +static bool debug; + +#define debugf(...) \ + do { \ + if (debug) \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) + +/* Exceptions */ +static PyObject *ELFFormatError; +static PyObject *ELFAccessError; + +/* most objects can only be created as return values from one of the methods */ +static PyObject *refuse_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyErr_SetString(PyExc_ValueError, + "cannot create instances of this type"); + return NULL; +} + +struct elfreloc; +struct elfsect; + +PREDECL_HASH(elfrelocs); + +/* ELFFile and ELFSection intentionally share some behaviour, particularly + * subscript[123:456] access to file data. This is because relocatables + * (.o files) do things section-based, but linked executables/libraries do + * things file-based. Having the two behave similar allows simplifying the + * Python code. + */ + +/* class ELFFile: + * + * overall entry point, instantiated by reading in an ELF file + */ +struct elffile { + PyObject_HEAD + + char *filename; + char *mmap, *mmend; + size_t len; + Elf *elf; + + /* note from here on there are several instances of + * + * GElf_Something *x, _x; + * + * this is a pattern used by libelf's generic ELF routines; the _x + * field is used to create a copy of the ELF structure from the file + * with 32/64bit and endianness adjusted. + */ + + GElf_Ehdr *ehdr, _ehdr; + Elf_Scn *symtab; + size_t nsym, symstridx; + Elf_Data *symdata; + + PyObject **sects; + size_t n_sect; + + struct elfrelocs_head dynrelocs; + + int elfclass; + bool bigendian; + bool has_symbols; +}; + +/* class ELFSection: + * + * note that executables and shared libraries can have their section headers + * removed, though in practice this is only used as an obfuscation technique. + */ +struct elfsect { + PyObject_HEAD + + const char *name; + struct elffile *ef; + + GElf_Shdr _shdr, *shdr; + Elf_Scn *scn; + unsigned long idx, len; + + struct elfrelocs_head relocs; +}; + +/* class ELFReloc: + * + * note: relocations in object files (.o) are section-based while relocations + * in executables and shared libraries are file-based. + * + * Whenever accessing something that is a pointer in the ELF file, the Python + * code needs to check for a relocation; if the pointer is pointing to some + * unresolved symbol the file will generally contain 0 bytes. The relocation + * will tell what the pointer is actually pointing to. + * + * This represents both static (.o file) and dynamic (.so/exec) relocations. + */ +struct elfreloc { + PyObject_HEAD + + struct elfrelocs_item elfrelocs_item; + + struct elfsect *es; + struct elffile *ef; + + /* there's also old-fashioned GElf_Rel; we're converting that to + * GElf_Rela in elfsect_add_relocations() + */ + GElf_Rela _rela, *rela; + GElf_Sym _sym, *sym; + size_t symidx; + const char *symname; + + /* documented below in python docstrings */ + bool symvalid, unresolved, relative; + unsigned long long st_value; +}; + +static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b); +static uint32_t elfreloc_hash(const struct elfreloc *reloc); + +DECLARE_HASH(elfrelocs, struct elfreloc, elfrelocs_item, + elfreloc_cmp, elfreloc_hash); + +static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx); +static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx); +static PyObject *elfreloc_getsection(PyObject *self, PyObject *args); +static PyObject *elfreloc_getaddend(PyObject *obj, void *closure); + +/* --- end of declarations -------------------------------------------------- */ + +/* + * class ELFReloc: + */ + +static const char elfreloc_doc[] = + "Represents an ELF relocation record\n" + "\n" + "(struct elfreloc * in elf_py.c)"; + +#define member(name, type, doc) \ + { \ + (char *)#name, type, offsetof(struct elfreloc, name), READONLY,\ + (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \ + } +static PyMemberDef members_elfreloc[] = { + member(symname, T_STRING, + "Name of symbol this relocation refers to.\n" + "\n" + "Will frequently be `None` in executables and shared libraries." + ), + member(symvalid, T_BOOL, + "Target symbol has a valid type, i.e. not STT_NOTYPE"), + member(unresolved, T_BOOL, + "Target symbol refers to an existing section"), + member(relative, T_BOOL, + "Relocation is a REL (not RELA) record and thus relative."), + member(st_value, T_ULONGLONG, + "Target symbol's value, if known\n\n" + "Will be zero for unresolved/external symbols."), + {} +}; +#undef member + +static PyGetSetDef getset_elfreloc[] = { + { .name = (char *)"r_addend", .get = elfreloc_getaddend, .doc = + (char *)"Relocation addend value"}, + {} +}; + +static PyMethodDef methods_elfreloc[] = { + {"getsection", elfreloc_getsection, METH_VARARGS, + "Find relocation target's ELF section\n\n" + "Args: address of relocatee (TODO: fix/remove?)\n" + "Returns: ELFSection or None\n\n" + "Not possible if section headers have been stripped."}, + {} +}; + +static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b) +{ + if (a->rela->r_offset < b->rela->r_offset) + return -1; + if (a->rela->r_offset > b->rela->r_offset) + return 1; + return 0; +} + +static uint32_t elfreloc_hash(const struct elfreloc *reloc) +{ + return jhash(&reloc->rela->r_offset, sizeof(reloc->rela->r_offset), + 0xc9a2b7f4); +} + +static struct elfreloc *elfrelocs_get(struct elfrelocs_head *head, + GElf_Addr offset) +{ + struct elfreloc dummy; + + dummy.rela = &dummy._rela; + dummy.rela->r_offset = offset; + return elfrelocs_find(head, &dummy); +} + +static PyObject *elfreloc_getsection(PyObject *self, PyObject *args) +{ + struct elfreloc *w = (struct elfreloc *)self; + long data; + + if (!PyArg_ParseTuple(args, "k", &data)) + return NULL; + + if (!w->es) + Py_RETURN_NONE; + + if (!w->symvalid || w->symidx == 0) { + size_t idx = 0; + Elf_Scn *scn; + + data = (w->relative ? data : 0) + w->rela->r_addend; + scn = elf_find_addr(w->es->ef, data, &idx); + if (!scn) + Py_RETURN_NONE; + return elffile_secbyidx(w->es->ef, scn, idx); + } + return elffile_secbyidx(w->es->ef, NULL, w->sym->st_shndx); +} + +static PyObject *elfreloc_getaddend(PyObject *obj, void *closure) +{ + struct elfreloc *w = (struct elfreloc *)obj; + + return Py_BuildValue("K", (unsigned long long)w->rela->r_addend); +} + +static PyObject *elfreloc_repr(PyObject *arg) +{ + struct elfreloc *w = (struct elfreloc *)arg; + + return PyUnicode_FromFormat("<ELFReloc @%lu %s+%lu>", + (unsigned long)w->rela->r_offset, + (w->symname && w->symname[0]) ? w->symname + : "[0]", + (unsigned long)w->rela->r_addend); +} + +static void elfreloc_free(void *arg) +{ + struct elfreloc *w = arg; + + (void)w; +} + +static PyTypeObject typeobj_elfreloc = { + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFReloc", + .tp_basicsize = sizeof(struct elfreloc), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = elfreloc_doc, + .tp_new = refuse_new, + .tp_free = elfreloc_free, + .tp_repr = elfreloc_repr, + .tp_members = members_elfreloc, + .tp_methods = methods_elfreloc, + .tp_getset = getset_elfreloc, +}; + +/* + * class ELFSection: + */ + +static const char elfsect_doc[] = + "Represents an ELF section\n" + "\n" + "To access section contents, use subscript notation, e.g.\n" + " section[123:456]\n" + "To read null terminated C strings, replace the end with str:\n" + " section[123:str]\n\n" + "(struct elfsect * in elf_py.c)"; + +static PyObject *elfsect_getaddr(PyObject *self, void *closure); + +#define member(name, type, doc) \ + { \ + (char *)#name, type, offsetof(struct elfsect, name), READONLY, \ + (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \ + } +static PyMemberDef members_elfsect[] = { + member(name, T_STRING, + "Section name, e.g. \".text\""), + member(idx, T_ULONG, + "Section index in file"), + member(len, T_ULONG, + "Section length in bytes"), + {}, +}; +#undef member + +static PyGetSetDef getset_elfsect[] = { + { .name = (char *)"sh_addr", .get = elfsect_getaddr, .doc = + (char *)"Section virtual address (mapped program view)"}, + {} +}; + +static PyObject *elfsect_getaddr(PyObject *self, void *closure) +{ + struct elfsect *w = (struct elfsect *)self; + + return Py_BuildValue("K", (unsigned long long)w->shdr->sh_addr); +} + + +static PyObject *elfsect_getreloc(PyObject *self, PyObject *args) +{ + struct elfsect *w = (struct elfsect *)self; + struct elfreloc *relw; + unsigned long offs; + PyObject *ret; + + if (!PyArg_ParseTuple(args, "k", &offs)) + return NULL; + + relw = elfrelocs_get(&w->relocs, offs + w->shdr->sh_addr); + if (!relw) + Py_RETURN_NONE; + + ret = (PyObject *)relw; + Py_INCREF(ret); + return ret; +} + +static PyMethodDef methods_elfsect[] = { + {"getreloc", elfsect_getreloc, METH_VARARGS, + "Check for / get relocation at offset into section\n\n" + "Args: byte offset into section to check\n" + "Returns: ELFReloc or None"}, + {} +}; + +static PyObject *elfsect_subscript(PyObject *self, PyObject *key) +{ + Py_ssize_t start, stop, step, sllen; + struct elfsect *w = (struct elfsect *)self; + PySliceObject *slice; + unsigned long offs, len = ~0UL; + + if (!PySlice_Check(key)) { + PyErr_SetString(PyExc_IndexError, + "ELFSection subscript must be slice"); + return NULL; + } + slice = (PySliceObject *)key; + if (PyLong_Check(slice->stop)) { + if (PySlice_GetIndicesEx(key, w->shdr->sh_size, + &start, &stop, &step, &sllen)) + return NULL; + + if (step != 1) { + PyErr_SetString(PyExc_IndexError, + "ELFSection subscript slice step must be 1"); + return NULL; + } + if ((GElf_Xword)stop > w->shdr->sh_size) { + PyErr_Format(ELFAccessError, + "access (%lu) beyond end of section %lu/%s (%lu)", + stop, w->idx, w->name, w->shdr->sh_size); + return NULL; + } + + offs = start; + len = sllen; + } else { + if (slice->stop != (void *)&PyUnicode_Type + || !PyLong_Check(slice->start)) { + PyErr_SetString(PyExc_IndexError, "invalid slice"); + return NULL; + } + + offs = PyLong_AsUnsignedLongLong(slice->start); + len = ~0UL; + } + + offs += w->shdr->sh_offset; + if (offs > w->ef->len) { + PyErr_Format(ELFAccessError, + "access (%lu) beyond end of file (%lu)", + offs, w->ef->len); + return NULL; + } + if (len == ~0UL) + len = strnlen(w->ef->mmap + offs, w->ef->len - offs); + + Py_ssize_t pylen = len; + +#if PY_MAJOR_VERSION >= 3 + return Py_BuildValue("y#", w->ef->mmap + offs, pylen); +#else + return Py_BuildValue("s#", w->ef->mmap + offs, pylen); +#endif +} + +static PyMappingMethods mp_elfsect = { + .mp_subscript = elfsect_subscript, +}; + +static void elfsect_free(void *arg) +{ + struct elfsect *w = arg; + + (void)w; +} + +static PyObject *elfsect_repr(PyObject *arg) +{ + struct elfsect *w = (struct elfsect *)arg; + + return PyUnicode_FromFormat("<ELFSection %s>", w->name); +} + +static PyTypeObject typeobj_elfsect = { + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFSection", + .tp_basicsize = sizeof(struct elfsect), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = elfsect_doc, + .tp_new = refuse_new, + .tp_free = elfsect_free, + .tp_repr = elfsect_repr, + .tp_as_mapping = &mp_elfsect, + .tp_members = members_elfsect, + .tp_methods = methods_elfsect, + .tp_getset = getset_elfsect, +}; + +static void elfsect_add_relocations(struct elfsect *w, Elf_Scn *rel, + GElf_Shdr *relhdr) +{ + size_t i, entries; + Elf_Scn *symtab = elf_getscn(w->ef->elf, relhdr->sh_link); + GElf_Shdr _symhdr, *symhdr = gelf_getshdr(symtab, &_symhdr); + Elf_Data *symdata = elf_getdata(symtab, NULL); + Elf_Data *reldata = elf_getdata(rel, NULL); + + entries = relhdr->sh_size / relhdr->sh_entsize; + for (i = 0; i < entries; i++) { + struct elfreloc *relw; + size_t symidx; + GElf_Rela *rela; + GElf_Sym *sym; + + relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc( + &typeobj_elfreloc, 0); + relw->es = w; + + if (relhdr->sh_type == SHT_REL) { + GElf_Rel _rel, *rel; + + rel = gelf_getrel(reldata, i, &_rel); + relw->rela = &relw->_rela; + relw->rela->r_offset = rel->r_offset; + relw->rela->r_info = rel->r_info; + relw->rela->r_addend = 0; + relw->relative = true; + } else + relw->rela = gelf_getrela(reldata, i, &relw->_rela); + + rela = relw->rela; + if (rela->r_offset < w->shdr->sh_addr + || rela->r_offset >= w->shdr->sh_addr + w->shdr->sh_size) + continue; + + symidx = relw->symidx = GELF_R_SYM(rela->r_info); + sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym); + if (sym) { + relw->symname = elf_strptr(w->ef->elf, symhdr->sh_link, + sym->st_name); + relw->symvalid = GELF_ST_TYPE(sym->st_info) + != STT_NOTYPE; + relw->unresolved = sym->st_shndx == SHN_UNDEF; + relw->st_value = sym->st_value; + } else { + relw->symname = NULL; + relw->symvalid = false; + relw->unresolved = false; + relw->st_value = 0; + } + + debugf("reloc @ %016llx sym %5llu %016llx %s\n", + (long long)rela->r_offset, (unsigned long long)symidx, + (long long)rela->r_addend, relw->symname); + + elfrelocs_add(&w->relocs, relw); + } +} + +/* + * bindings & loading code between ELFFile and ELFSection + */ + +static PyObject *elfsect_wrap(struct elffile *ef, Elf_Scn *scn, size_t idx, + const char *name) +{ + struct elfsect *w; + size_t i; + + w = (struct elfsect *)typeobj_elfsect.tp_alloc(&typeobj_elfsect, 0); + if (!w) + return NULL; + + w->name = name; + w->ef = ef; + w->scn = scn; + w->shdr = gelf_getshdr(scn, &w->_shdr); + w->len = w->shdr->sh_size; + w->idx = idx; + elfrelocs_init(&w->relocs); + + for (i = 0; i < ef->ehdr->e_shnum; i++) { + Elf_Scn *scn = elf_getscn(ef->elf, i); + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + if (shdr->sh_type != SHT_RELA && shdr->sh_type != SHT_REL) + continue; + if (shdr->sh_info && shdr->sh_info != idx) + continue; + elfsect_add_relocations(w, scn, shdr); + } + + return (PyObject *)w; +} + +static Elf_Scn *elf_find_section(struct elffile *ef, const char *name, + size_t *idx) +{ + size_t i; + const char *secname; + + for (i = 0; i < ef->ehdr->e_shnum; i++) { + Elf_Scn *scn = elf_getscn(ef->elf, i); + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + secname = elf_strptr(ef->elf, ef->ehdr->e_shstrndx, + shdr->sh_name); + if (strcmp(secname, name)) + continue; + if (idx) + *idx = i; + return scn; + } + return NULL; +} + +static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx) +{ + size_t i; + + for (i = 0; i < ef->ehdr->e_shnum; i++) { + Elf_Scn *scn = elf_getscn(ef->elf, i); + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + /* virtual address is kinda meaningless for TLS sections */ + if (shdr->sh_flags & SHF_TLS) + continue; + if (addr < shdr->sh_addr || + addr >= shdr->sh_addr + shdr->sh_size) + continue; + + if (idx) + *idx = i; + return scn; + } + return NULL; +} + +/* + * class ELFFile: + */ + +static const char elffile_doc[] = + "Represents an ELF file\n" + "\n" + "Args: filename to load\n" + "\n" + "To access raw file contents, use subscript notation, e.g.\n" + " file[123:456]\n" + "To read null terminated C strings, replace the end with str:\n" + " file[123:str]\n\n" + "(struct elffile * in elf_py.c)"; + + +#define member(name, type, doc) \ + { \ + (char *)#name, type, offsetof(struct elffile, name), READONLY, \ + (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \ + } +static PyMemberDef members_elffile[] = { + member(filename, T_STRING, + "Original file name as given when opening"), + member(elfclass, T_INT, + "ELF class (architecture bit size)\n\n" + "Either 32 or 64, straight integer."), + member(bigendian, T_BOOL, + "ELF file is big-endian\n\n" + "All internal ELF structures are automatically converted."), + member(has_symbols, T_BOOL, + "A symbol section is present\n\n" + "Note: only refers to .symtab/SHT_SYMTAB section, not DT_SYMTAB" + ), + {}, +}; +#undef member + +static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx) +{ + const char *name; + PyObject *ret; + + if (!scn) + scn = elf_getscn(w->elf, idx); + if (!scn || idx >= w->n_sect) + Py_RETURN_NONE; + + if (!w->sects[idx]) { + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + name = elf_strptr(w->elf, w->ehdr->e_shstrndx, shdr->sh_name); + w->sects[idx] = elfsect_wrap(w, scn, idx, name); + } + + ret = w->sects[idx]; + Py_INCREF(ret); + return ret; +} + +static PyObject *elffile_get_section(PyObject *self, PyObject *args) +{ + const char *name; + struct elffile *w = (struct elffile *)self; + Elf_Scn *scn; + size_t idx = 0; + + if (!PyArg_ParseTuple(args, "s", &name)) + return NULL; + + scn = elf_find_section(w, name, &idx); + return elffile_secbyidx(w, scn, idx); +} + +static PyObject *elffile_get_section_addr(PyObject *self, PyObject *args) +{ + unsigned long long addr; + struct elffile *w = (struct elffile *)self; + Elf_Scn *scn; + size_t idx = 0; + + if (!PyArg_ParseTuple(args, "K", &addr)) + return NULL; + + scn = elf_find_addr(w, addr, &idx); + return elffile_secbyidx(w, scn, idx); +} + +static PyObject *elffile_get_section_idx(PyObject *self, PyObject *args) +{ + unsigned long long idx; + struct elffile *w = (struct elffile *)self; + + if (!PyArg_ParseTuple(args, "K", &idx)) + return NULL; + + return elffile_secbyidx(w, NULL, idx); +} + +static PyObject *elffile_get_symbol(PyObject *self, PyObject *args) +{ + const char *name, *symname; + struct elffile *w = (struct elffile *)self; + GElf_Sym _sym, *sym; + size_t i; + + if (!PyArg_ParseTuple(args, "s", &name)) + return NULL; + + for (i = 0; i < w->nsym; i++) { + sym = gelf_getsym(w->symdata, i, &_sym); + if (sym->st_name == 0) + continue; + symname = elf_strptr(w->elf, w->symstridx, sym->st_name); + if (strcmp(symname, name)) + continue; + + PyObject *pysect; + Elf_Scn *scn = elf_getscn(w->elf, sym->st_shndx); + + if (scn) + pysect = elffile_secbyidx(w, scn, sym->st_shndx); + else { + pysect = Py_None; + Py_INCREF(pysect); + } + return Py_BuildValue("sKN", symname, + (unsigned long long)sym->st_value, pysect); + } + Py_RETURN_NONE; +} + +static PyObject *elffile_getreloc(PyObject *self, PyObject *args) +{ + struct elffile *w = (struct elffile *)self; + struct elfreloc *relw; + unsigned long offs; + PyObject *ret; + + if (!PyArg_ParseTuple(args, "k", &offs)) + return NULL; + + relw = elfrelocs_get(&w->dynrelocs, offs); + if (!relw) + Py_RETURN_NONE; + + ret = (PyObject *)relw; + Py_INCREF(ret); + return ret; +} + +static PyObject *elffile_find_note(PyObject *self, PyObject *args) +{ +#if defined(HAVE_GELF_GETNOTE) && defined(HAVE_ELF_GETDATA_RAWCHUNK) + const char *owner; + const uint8_t *ids; + GElf_Word id; + struct elffile *w = (struct elffile *)self; + size_t i; + + if (!PyArg_ParseTuple(args, "ss", &owner, &ids)) + return NULL; + + if (strlen((char *)ids) != 4) { + PyErr_SetString(PyExc_ValueError, + "ELF note ID must be exactly 4-byte string"); + return NULL; + } + if (w->bigendian) + id = (ids[0] << 24) | (ids[1] << 16) | (ids[2] << 8) | ids[3]; + else + id = (ids[3] << 24) | (ids[2] << 16) | (ids[1] << 8) | ids[0]; + + for (i = 0; i < w->ehdr->e_phnum; i++) { + GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + Elf_Data *notedata; + size_t offset; + + if (phdr->p_type != PT_NOTE) + continue; + + notedata = elf_getdata_rawchunk(w->elf, phdr->p_offset, + phdr->p_filesz, ELF_T_NHDR); + + GElf_Nhdr nhdr[1]; + size_t nameoffs, dataoffs; + + offset = 0; + while ((offset = gelf_getnote(notedata, offset, nhdr, + &nameoffs, &dataoffs))) { + if (phdr->p_offset + nameoffs >= w->len) + continue; + + const char *name = w->mmap + phdr->p_offset + nameoffs; + + if (strcmp(name, owner)) + continue; + if (id != nhdr->n_type) + continue; + + PyObject *s, *e; + + s = PyLong_FromUnsignedLongLong( + phdr->p_vaddr + dataoffs); + e = PyLong_FromUnsignedLongLong( + phdr->p_vaddr + dataoffs + nhdr->n_descsz); + return PySlice_New(s, e, NULL); + } + } +#endif + Py_RETURN_NONE; +} + +#ifdef HAVE_ELF_GETDATA_RAWCHUNK +static bool elffile_virt2file(struct elffile *w, GElf_Addr virt, + GElf_Addr *offs) +{ + *offs = 0; + + for (size_t i = 0; i < w->ehdr->e_phnum; i++) { + GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + + if (phdr->p_type != PT_LOAD) + continue; + + if (virt < phdr->p_vaddr + || virt >= phdr->p_vaddr + phdr->p_memsz) + continue; + + if (virt >= phdr->p_vaddr + phdr->p_filesz) + return false; + + *offs = virt - phdr->p_vaddr + phdr->p_offset; + return true; + } + + return false; +} +#endif /* HAVE_ELF_GETDATA_RAWCHUNK */ + +static PyObject *elffile_subscript(PyObject *self, PyObject *key) +{ + Py_ssize_t start, stop, step; + PySliceObject *slice; + struct elffile *w = (struct elffile *)self; + bool str = false; + + if (!PySlice_Check(key)) { + PyErr_SetString(PyExc_IndexError, + "ELFFile subscript must be slice"); + return NULL; + } + slice = (PySliceObject *)key; + stop = -1; + step = 1; + if (PyLong_Check(slice->stop)) { + start = PyLong_AsSsize_t(slice->start); + if (PyErr_Occurred()) + return NULL; + if (slice->stop != Py_None) { + stop = PyLong_AsSsize_t(slice->stop); + if (PyErr_Occurred()) + return NULL; + } + if (slice->step != Py_None) { + step = PyLong_AsSsize_t(slice->step); + if (PyErr_Occurred()) + return NULL; + } + } else { + if (slice->stop != (void *)&PyUnicode_Type + || !PyLong_Check(slice->start)) { + PyErr_SetString(PyExc_IndexError, "invalid slice"); + return NULL; + } + + str = true; + start = PyLong_AsUnsignedLongLong(slice->start); + } + if (step != 1) { + PyErr_SetString(PyExc_IndexError, + "ELFFile subscript slice step must be 1"); + return NULL; + } + + GElf_Addr xstart = start, xstop = stop; + + for (size_t i = 0; i < w->ehdr->e_phnum; i++) { + GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + + if (phdr->p_type != PT_LOAD) + continue; + + if (xstart < phdr->p_vaddr + || xstart >= phdr->p_vaddr + phdr->p_memsz) + continue; + if (!str && (xstop < phdr->p_vaddr + || xstop > phdr->p_vaddr + phdr->p_memsz)) { + PyErr_Format(ELFAccessError, + "access (%llu) beyond end of program header (%llu)", + (long long)xstop, + (long long)(phdr->p_vaddr + + phdr->p_memsz)); + return NULL; + } + + xstart = xstart - phdr->p_vaddr + phdr->p_offset; + + if (str) + xstop = strlen(w->mmap + xstart); + else + xstop = xstop - phdr->p_vaddr + phdr->p_offset; + + Py_ssize_t pylen = xstop - xstart; + +#if PY_MAJOR_VERSION >= 3 + return Py_BuildValue("y#", w->mmap + xstart, pylen); +#else + return Py_BuildValue("s#", w->mmap + xstart, pylen); +#endif + }; + + return PyErr_Format(ELFAccessError, + "virtual address (%llu) not found in program headers", + (long long)start); +} + +static PyMethodDef methods_elffile[] = { + {"find_note", elffile_find_note, METH_VARARGS, + "find specific note entry"}, + {"getreloc", elffile_getreloc, METH_VARARGS, + "find relocation"}, + {"get_symbol", elffile_get_symbol, METH_VARARGS, + "find symbol by name"}, + {"get_section", elffile_get_section, METH_VARARGS, + "find section by name"}, + {"get_section_addr", elffile_get_section_addr, METH_VARARGS, + "find section by address"}, + {"get_section_idx", elffile_get_section_idx, METH_VARARGS, + "find section by index"}, + {} +}; + +static PyObject *elffile_load(PyTypeObject *type, PyObject *args, + PyObject *kwds); + +static void elffile_free(void *arg) +{ + struct elffile *w = arg; + + elf_end(w->elf); + munmap(w->mmap, w->len); + free(w->filename); +} + +static PyMappingMethods mp_elffile = { + .mp_subscript = elffile_subscript, +}; + +static PyTypeObject typeobj_elffile = { + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFFile", + .tp_basicsize = sizeof(struct elffile), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = elffile_doc, + .tp_new = elffile_load, + .tp_free = elffile_free, + .tp_as_mapping = &mp_elffile, + .tp_members = members_elffile, + .tp_methods = methods_elffile, +}; + +#ifdef HAVE_ELF_GETDATA_RAWCHUNK +static char *elfdata_strptr(Elf_Data *data, size_t offset) +{ + char *p; + + if (offset >= data->d_size) + return NULL; + + p = (char *)data->d_buf + offset; + if (strnlen(p, data->d_size - offset) >= data->d_size - offset) + return NULL; + + return p; +} + +static void elffile_add_dynreloc(struct elffile *w, Elf_Data *reldata, + size_t entries, Elf_Data *symdata, + Elf_Data *strdata, Elf_Type typ) +{ + size_t i; + + for (i = 0; i < entries; i++) { + struct elfreloc *relw; + size_t symidx; + GElf_Rela *rela; + GElf_Sym *sym; + GElf_Addr rel_offs = 0; + + relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc( + &typeobj_elfreloc, 0); + relw->ef = w; + + if (typ == ELF_T_REL) { + GElf_Rel _rel, *rel; + GElf_Addr offs; + + rel = gelf_getrel(reldata, i, &_rel); + relw->rela = &relw->_rela; + relw->rela->r_offset = rel->r_offset; + relw->rela->r_info = rel->r_info; + relw->rela->r_addend = 0; + relw->relative = true; + + /* REL uses the pointer contents itself instead of the + * RELA addend field :( ... theoretically this could + * be some weird platform specific encoding, but since + * we only care about data relocations it should + * always be a pointer... + */ + if (elffile_virt2file(w, rel->r_offset, &offs)) { + Elf_Data *ptr; + + /* NB: this endian-converts! */ + ptr = elf_getdata_rawchunk(w->elf, offs, + w->elfclass / 8, + ELF_T_ADDR); + + if (ptr) { + char *dst = (char *)&rel_offs; + + /* sigh. it endian-converts. but + * doesn't size-convert. + */ + if (BYTE_ORDER == BIG_ENDIAN && + ptr->d_size < sizeof(rel_offs)) + dst += sizeof(rel_offs) - + ptr->d_size; + + memcpy(dst, ptr->d_buf, ptr->d_size); + + relw->relative = false; + relw->rela->r_addend = rel_offs; + } + } + } else + relw->rela = gelf_getrela(reldata, i, &relw->_rela); + + rela = relw->rela; + symidx = relw->symidx = GELF_R_SYM(rela->r_info); + sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym); + if (sym) { + relw->symname = elfdata_strptr(strdata, sym->st_name); + relw->symvalid = GELF_ST_TYPE(sym->st_info) + != STT_NOTYPE; + relw->unresolved = sym->st_shndx == SHN_UNDEF; + relw->st_value = sym->st_value; + } else { + relw->symname = NULL; + relw->symvalid = false; + relw->unresolved = false; + relw->st_value = 0; + } + + if (typ == ELF_T_RELA) + debugf("dynrela @ %016llx sym %5llu %016llx %s\n", + (long long)rela->r_offset, + (unsigned long long)symidx, + (long long)rela->r_addend, relw->symname); + else + debugf("dynrel @ %016llx sym %5llu (%016llx) %s\n", + (long long)rela->r_offset, + (unsigned long long)symidx, + (unsigned long long)rel_offs, relw->symname); + + elfrelocs_add(&w->dynrelocs, relw); + } + +} +#endif /* HAVE_ELF_GETDATA_RAWCHUNK */ + +/* primary (only, really) entry point to anything in this module */ +static PyObject *elffile_load(PyTypeObject *type, PyObject *args, + PyObject *kwds) +{ + const char *filename; + static const char * const kwnames[] = {"filename", NULL}; + struct elffile *w; + struct stat st; + int fd, err; + + w = (struct elffile *)typeobj_elffile.tp_alloc(&typeobj_elffile, 0); + if (!w) + return NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", (char **)kwnames, + &filename)) + return NULL; + + w->filename = strdup(filename); + fd = open(filename, O_RDONLY | O_NOCTTY); + if (fd < 0 || fstat(fd, &st)) { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename); + close(fd); + goto out; + } + w->len = st.st_size; + w->mmap = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (!w->mmap) { + PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename); + close(fd); + goto out; + } + close(fd); + w->mmend = w->mmap + st.st_size; + + if (w->len < EI_NIDENT || memcmp(w->mmap, ELFMAG, SELFMAG)) { + PyErr_SetString(ELFFormatError, "invalid ELF signature"); + goto out; + } + + switch (w->mmap[EI_CLASS]) { + case ELFCLASS32: + w->elfclass = 32; + break; + case ELFCLASS64: + w->elfclass = 64; + break; + default: + PyErr_SetString(ELFFormatError, "invalid ELF class"); + goto out; + } + switch (w->mmap[EI_DATA]) { + case ELFDATA2LSB: + w->bigendian = false; + break; + case ELFDATA2MSB: + w->bigendian = true; + break; + default: + PyErr_SetString(ELFFormatError, "invalid ELF byte order"); + goto out; + } + + w->elf = elf_memory(w->mmap, w->len); + if (!w->elf) + goto out_elferr; + w->ehdr = gelf_getehdr(w->elf, &w->_ehdr); + if (!w->ehdr) + goto out_elferr; + + for (size_t i = 0; i < w->ehdr->e_shnum; i++) { + Elf_Scn *scn = elf_getscn(w->elf, i); + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + if (shdr->sh_type == SHT_SYMTAB) { + w->symtab = scn; + w->nsym = shdr->sh_size / shdr->sh_entsize; + w->symdata = elf_getdata(scn, NULL); + w->symstridx = shdr->sh_link; + break; + } + } + w->has_symbols = w->symtab && w->symstridx; + elfrelocs_init(&w->dynrelocs); + +#ifdef HAVE_ELF_GETDATA_RAWCHUNK + for (size_t i = 0; i < w->ehdr->e_phnum; i++) { + GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + + if (phdr->p_type != PT_DYNAMIC) + continue; + + Elf_Data *dyndata = elf_getdata_rawchunk(w->elf, + phdr->p_offset, phdr->p_filesz, ELF_T_DYN); + + GElf_Addr dynrela = 0, dynrel = 0, symtab = 0, strtab = 0; + size_t dynrelasz = 0, dynrelaent = 0; + size_t dynrelsz = 0, dynrelent = 0; + size_t strsz = 0; + GElf_Dyn _dyn, *dyn; + + for (size_t j = 0;; j++) { + dyn = gelf_getdyn(dyndata, j, &_dyn); + + if (dyn->d_tag == DT_NULL) + break; + + switch (dyn->d_tag) { + case DT_SYMTAB: + symtab = dyn->d_un.d_ptr; + break; + + case DT_STRTAB: + strtab = dyn->d_un.d_ptr; + break; + case DT_STRSZ: + strsz = dyn->d_un.d_val; + break; + + case DT_RELA: + dynrela = dyn->d_un.d_ptr; + break; + case DT_RELASZ: + dynrelasz = dyn->d_un.d_val; + break; + case DT_RELAENT: + dynrelaent = dyn->d_un.d_val; + break; + + case DT_REL: + dynrel = dyn->d_un.d_ptr; + break; + case DT_RELSZ: + dynrelsz = dyn->d_un.d_val; + break; + case DT_RELENT: + dynrelent = dyn->d_un.d_val; + break; + } + } + + GElf_Addr offset; + Elf_Data *symdata = NULL, *strdata = NULL; + + if (elffile_virt2file(w, symtab, &offset)) + symdata = elf_getdata_rawchunk(w->elf, offset, + w->len - offset, + ELF_T_SYM); + if (elffile_virt2file(w, strtab, &offset)) + strdata = elf_getdata_rawchunk(w->elf, offset, + strsz, ELF_T_BYTE); + + size_t c; + + if (dynrela && dynrelasz && dynrelaent + && elffile_virt2file(w, dynrela, &offset)) { + Elf_Data *reladata = NULL; + + debugf("dynrela @%llx/%llx+%llx\n", (long long)dynrela, + (long long)offset, (long long)dynrelasz); + + reladata = elf_getdata_rawchunk(w->elf, offset, + dynrelasz, ELF_T_RELA); + + c = dynrelasz / dynrelaent; + elffile_add_dynreloc(w, reladata, c, symdata, strdata, + ELF_T_RELA); + } + + if (dynrel && dynrelsz && dynrelent + && elffile_virt2file(w, dynrel, &offset)) { + Elf_Data *reldata = NULL; + + debugf("dynrel @%llx/%llx+%llx\n", (long long)dynrel, + (long long)offset, (long long)dynrelsz); + + reldata = elf_getdata_rawchunk(w->elf, offset, dynrelsz, + ELF_T_REL); + + c = dynrelsz / dynrelent; + elffile_add_dynreloc(w, reldata, c, symdata, strdata, + ELF_T_REL); + } + } +#endif + + w->sects = calloc(sizeof(PyObject *), w->ehdr->e_shnum); + w->n_sect = w->ehdr->e_shnum; + + return (PyObject *)w; + +out_elferr: + err = elf_errno(); + + PyErr_Format(ELFFormatError, "libelf error %d: %s", + err, elf_errmsg(err)); +out: + if (w->elf) + elf_end(w->elf); + free(w->filename); + return NULL; +} + +static PyObject *elfpy_debug(PyObject *self, PyObject *args) +{ + int arg; + + if (!PyArg_ParseTuple(args, "p", &arg)) + return NULL; + + debug = arg; + + Py_RETURN_NONE; +} + +static PyMethodDef methods_elfpy[] = { + {"elfpy_debug", elfpy_debug, METH_VARARGS, "switch debuging on/off"}, + {} +}; + +bool elf_py_init(PyObject *pymod) +{ + if (PyType_Ready(&typeobj_elffile) < 0) + return false; + if (PyType_Ready(&typeobj_elfsect) < 0) + return false; + if (PyType_Ready(&typeobj_elfreloc) < 0) + return false; + if (elf_version(EV_CURRENT) == EV_NONE) + return false; + +#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 5 + PyModule_AddFunctions(pymod, methods_elfpy); +#else + (void)methods_elfpy; +#endif + + ELFFormatError = PyErr_NewException("_clippy.ELFFormatError", + PyExc_ValueError, NULL); + PyModule_AddObject(pymod, "ELFFormatError", ELFFormatError); + ELFAccessError = PyErr_NewException("_clippy.ELFAccessError", + PyExc_IndexError, NULL); + PyModule_AddObject(pymod, "ELFAccessError", ELFAccessError); + + Py_INCREF(&typeobj_elffile); + PyModule_AddObject(pymod, "ELFFile", (PyObject *)&typeobj_elffile); + Py_INCREF(&typeobj_elfsect); + PyModule_AddObject(pymod, "ELFSection", (PyObject *)&typeobj_elfsect); + Py_INCREF(&typeobj_elfreloc); + PyModule_AddObject(pymod, "ELFReloc", (PyObject *)&typeobj_elfreloc); + return true; +} |