From 43a97878ce14b72f0981164f87f2e35e14151312 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 11:22:09 +0200 Subject: Adding upstream version 110.0.1. Signed-off-by: Daniel Baumann --- build/unix/elfhack/elfhack.cpp | 1450 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1450 insertions(+) create mode 100644 build/unix/elfhack/elfhack.cpp (limited to 'build/unix/elfhack/elfhack.cpp') diff --git a/build/unix/elfhack/elfhack.cpp b/build/unix/elfhack/elfhack.cpp new file mode 100644 index 0000000000..6771cffa6e --- /dev/null +++ b/build/unix/elfhack/elfhack.cpp @@ -0,0 +1,1450 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#undef NDEBUG +#include +#include +#include +#include +#include + +#include "elfxx.h" +#include "mozilla/CheckedInt.h" + +#define ver "1" +#define elfhack_data ".elfhack.data.v" ver +#define elfhack_text ".elfhack.text.v" ver + +#ifndef R_ARM_V4BX +# define R_ARM_V4BX 0x28 +#endif +#ifndef R_ARM_CALL +# define R_ARM_CALL 0x1c +#endif +#ifndef R_ARM_JUMP24 +# define R_ARM_JUMP24 0x1d +#endif +#ifndef R_ARM_THM_JUMP24 +# define R_ARM_THM_JUMP24 0x1e +#endif + +char* rundir = nullptr; + +template +struct wrapped { + T value; +}; + +class Elf_Addr_Traits { + public: + typedef wrapped Type32; + typedef wrapped Type64; + + template + static inline void swap(T& t, R& r) { + r.value = endian::swap(t.value); + } +}; + +typedef serializable Elf_Addr; + +class ElfRelHack_Section : public ElfSection { + public: + ElfRelHack_Section(Elf_Shdr& s) + : ElfSection(s, nullptr, nullptr), + block_size((8 * s.sh_entsize - 1) * s.sh_entsize) { + name = elfhack_data; + }; + + void serialize(std::ofstream& file, unsigned char ei_class, + unsigned char ei_data) { + for (std::vector::iterator i = relr.begin(); i != relr.end(); + ++i) { + Elf_Addr out; + out.value = *i; + out.serialize(file, ei_class, ei_data); + } + } + + bool isRelocatable() { return true; } + + void push_back(Elf64_Addr offset) { + // The format used for the packed relocations is SHT_RELR, described in + // https://groups.google.com/g/generic-abi/c/bX460iggiKg/m/Jnz1lgLJAgAJ + // The gist of it is that an address is recorded, and the following words, + // if their LSB is 1, represent a bitmap of word-size-spaced relocations + // at the addresses that follow. There can be multiple such bitmaps, such + // that very long streaks of (possibly spaced) relocations can be recorded + // in a very compact way. + for (;;) { + // [block_start; block_start + block_size] represents the range of offsets + // the current bitmap can record. If the offset doesn't fall in that + // range, or if doesn't align properly to be recorded, we record the + // bitmap, and slide the block corresponding to a new bitmap. If the + // offset doesn't fall in the range for the new bitmap, or if there wasn't + // an active bitmap in the first place, we record the offset and start a + // new bitmap for the block that follows it. + if (!block_start || offset < block_start || + offset >= block_start + block_size || + (offset - block_start) % shdr.sh_entsize) { + if (bitmap) { + relr.push_back((bitmap << 1) | 1); + block_start += block_size; + bitmap = 0; + continue; + } + relr.push_back(offset); + block_start = offset + shdr.sh_entsize; + break; + } + bitmap |= 1ULL << ((offset - block_start) / shdr.sh_entsize); + break; + } + shdr.sh_size = relr.size() * shdr.sh_entsize; + } + + private: + std::vector relr; + size_t block_size; + Elf64_Addr block_start = 0; + Elf64_Addr bitmap = 0; +}; + +class ElfRelHackCode_Section : public ElfSection { + public: + ElfRelHackCode_Section(Elf_Shdr& s, Elf& e, + ElfRelHack_Section& relhack_section, unsigned int init, + unsigned int mprotect_cb, unsigned int sysconf_cb) + : ElfSection(s, nullptr, nullptr), + parent(e), + relhack_section(relhack_section), + init(init), + init_trampoline(nullptr), + mprotect_cb(mprotect_cb), + sysconf_cb(sysconf_cb) { + std::string file(rundir); + file += "/inject/"; + switch (parent.getMachine()) { + case EM_386: + file += "x86"; + break; + case EM_X86_64: + file += "x86_64"; + break; + case EM_ARM: + file += "arm"; + break; + case EM_AARCH64: + file += "aarch64"; + break; + default: + throw std::runtime_error("unsupported architecture"); + } + file += ".o"; + std::ifstream inject(file.c_str(), std::ios::in | std::ios::binary); + elf = new Elf(inject); + if (elf->getType() != ET_REL) + throw std::runtime_error("object for injected code is not ET_REL"); + if (elf->getMachine() != parent.getMachine()) + throw std::runtime_error( + "architecture of object for injected code doesn't match"); + + ElfSymtab_Section* symtab = nullptr; + + // Find the symbol table. + for (ElfSection* section = elf->getSection(1); section != nullptr; + section = section->getNext()) { + if (section->getType() == SHT_SYMTAB) + symtab = (ElfSymtab_Section*)section; + } + if (symtab == nullptr) + throw std::runtime_error( + "Couldn't find a symbol table for the injected code"); + + relro = parent.getSegmentByType(PT_GNU_RELRO); + + // Find the init symbol + entry_point = -1; + std::string symbol = "init"; + if (!init) symbol += "_noinit"; + if (relro) symbol += "_relro"; + Elf_SymValue* sym = symtab->lookup(symbol.c_str()); + if (!sym) + throw std::runtime_error( + "Couldn't find an 'init' symbol in the injected code"); + + entry_point = sym->value.getValue(); + + // Get all relevant sections from the injected code object. + add_code_section(sym->value.getSection()); + + // If the original init function is located too far away, we're going to + // need to use a trampoline. See comment in inject.c. + // Theoretically, we should check for (init - instr) > 0xffffff, where instr + // is the virtual address of the instruction that calls the original init, + // but we don't have it at this point, so punt to just init. + if (init > 0xffffff && parent.getMachine() == EM_ARM) { + Elf_SymValue* trampoline = symtab->lookup("init_trampoline"); + if (!trampoline) { + throw std::runtime_error( + "Couldn't find an 'init_trampoline' symbol in the injected code"); + } + + init_trampoline = trampoline->value.getSection(); + add_code_section(init_trampoline); + } + + // Adjust code sections offsets according to their size + std::vector::iterator c = code.begin(); + (*c)->getShdr().sh_addr = 0; + for (ElfSection* last = *(c++); c != code.end(); ++c) { + unsigned int addr = last->getShdr().sh_addr + last->getSize(); + if (addr & ((*c)->getAddrAlign() - 1)) + addr = (addr | ((*c)->getAddrAlign() - 1)) + 1; + (*c)->getShdr().sh_addr = addr; + // We need to align this section depending on the greater + // alignment required by code sections. + if (shdr.sh_addralign < (*c)->getAddrAlign()) + shdr.sh_addralign = (*c)->getAddrAlign(); + last = *c; + } + shdr.sh_size = code.back()->getAddr() + code.back()->getSize(); + data = static_cast(malloc(shdr.sh_size)); + if (!data) { + throw std::runtime_error("Could not malloc ElfSection data"); + } + char* buf = data; + for (c = code.begin(); c != code.end(); ++c) { + memcpy(buf, (*c)->getData(), (*c)->getSize()); + buf += (*c)->getSize(); + } + name = elfhack_text; + } + + ~ElfRelHackCode_Section() { delete elf; } + + void serialize(std::ofstream& file, unsigned char ei_class, + unsigned char ei_data) override { + // Readjust code offsets + for (std::vector::iterator c = code.begin(); c != code.end(); + ++c) + (*c)->getShdr().sh_addr += getAddr(); + + // Apply relocations + for (std::vector::iterator c = code.begin(); c != code.end(); + ++c) { + for (ElfSection* rel = elf->getSection(1); rel != nullptr; + rel = rel->getNext()) + if (((rel->getType() == SHT_REL) || (rel->getType() == SHT_RELA)) && + (rel->getInfo().section == *c)) { + if (rel->getType() == SHT_REL) + apply_relocations((ElfRel_Section*)rel, *c); + else + apply_relocations((ElfRel_Section*)rel, *c); + } + } + + ElfSection::serialize(file, ei_class, ei_data); + } + + bool isRelocatable() override { return false; } + + unsigned int getEntryPoint() { return entry_point; } + + void insertBefore(ElfSection* section, bool dirty = true) override { + // Adjust the address so that this section is adjacent to the one it's + // being inserted before. This avoids creating holes which subsequently + // might lead the PHDR-adjusting code to create unnecessary additional + // PT_LOADs. + shdr.sh_addr = + (section->getAddr() - shdr.sh_size) & ~(shdr.sh_addralign - 1); + ElfSection::insertBefore(section, dirty); + } + + private: + void add_code_section(ElfSection* section) { + if (section) { + /* Don't add section if it's already been added in the past */ + for (auto s = code.begin(); s != code.end(); ++s) { + if (section == *s) return; + } + code.push_back(section); + find_code(section); + } + } + + /* Look at the relocations associated to the given section to find other + * sections that it requires */ + void find_code(ElfSection* section) { + for (ElfSection* s = elf->getSection(1); s != nullptr; s = s->getNext()) { + if (((s->getType() == SHT_REL) || (s->getType() == SHT_RELA)) && + (s->getInfo().section == section)) { + if (s->getType() == SHT_REL) + scan_relocs_for_code((ElfRel_Section*)s); + else + scan_relocs_for_code((ElfRel_Section*)s); + } + } + } + + template + void scan_relocs_for_code(ElfRel_Section* rel) { + ElfSymtab_Section* symtab = (ElfSymtab_Section*)rel->getLink(); + for (auto r = rel->rels.begin(); r != rel->rels.end(); ++r) { + ElfSection* section = + symtab->syms[ELF64_R_SYM(r->r_info)].value.getSection(); + add_code_section(section); + } + } + + // TODO: sort out which non-aarch64 relocation types should be using + // `value` (even though in practice it's either 0 or the same as addend) + class pc32_relocation { + public: + Elf32_Addr operator()(unsigned int base_addr, Elf64_Off offset, + Elf64_Sxword addend, unsigned int addr, + Elf64_Word value) { + return addr + addend - offset - base_addr; + } + }; + + class arm_plt32_relocation { + public: + Elf32_Addr operator()(unsigned int base_addr, Elf64_Off offset, + Elf64_Sxword addend, unsigned int addr, + Elf64_Word value) { + // We don't care about sign_extend because the only case where this is + // going to be used only jumps forward. + Elf32_Addr tmp = (Elf32_Addr)(addr - offset - base_addr) >> 2; + tmp = (addend + tmp) & 0x00ffffff; + return (addend & 0xff000000) | tmp; + } + }; + + class arm_thm_jump24_relocation { + public: + Elf32_Addr operator()(unsigned int base_addr, Elf64_Off offset, + Elf64_Sxword addend, unsigned int addr, + Elf64_Word value) { + /* Follows description of b.w and bl instructions as per + ARM Architecture Reference Manual ARM® v7-A and ARM® v7-R edition, + A8.6.16 We limit ourselves to Encoding T4 of b.w and Encoding T1 of bl. + We don't care about sign_extend because the only case where this is + going to be used only jumps forward. */ + Elf32_Addr tmp = (Elf32_Addr)(addr - offset - base_addr); + unsigned int word0 = addend & 0xffff, word1 = addend >> 16; + + /* Encoding T4 of B.W is 10x1 ; Encoding T1 of BL is 11x1. */ + unsigned int type = (word1 & 0xd000) >> 12; + if (((word0 & 0xf800) != 0xf000) || ((type & 0x9) != 0x9)) + throw std::runtime_error( + "R_ARM_THM_JUMP24/R_ARM_THM_CALL relocation only supported for B.W " + "