diff options
Diffstat (limited to 'mozglue/linker')
-rw-r--r-- | mozglue/linker/BaseElf.cpp | 190 | ||||
-rw-r--r-- | mozglue/linker/BaseElf.h | 128 | ||||
-rw-r--r-- | mozglue/linker/CustomElf.cpp | 680 | ||||
-rw-r--r-- | mozglue/linker/CustomElf.h | 147 | ||||
-rw-r--r-- | mozglue/linker/ElfLoader.cpp | 1360 | ||||
-rw-r--r-- | mozglue/linker/ElfLoader.h | 634 | ||||
-rw-r--r-- | mozglue/linker/Elfxx.h | 246 | ||||
-rw-r--r-- | mozglue/linker/Linker.h | 24 | ||||
-rw-r--r-- | mozglue/linker/Logging.cpp | 7 | ||||
-rw-r--r-- | mozglue/linker/Logging.h | 72 | ||||
-rw-r--r-- | mozglue/linker/Mappable.cpp | 376 | ||||
-rw-r--r-- | mozglue/linker/Mappable.h | 161 | ||||
-rw-r--r-- | mozglue/linker/Utils.h | 532 | ||||
-rw-r--r-- | mozglue/linker/XZStream.cpp | 221 | ||||
-rw-r--r-- | mozglue/linker/XZStream.h | 49 | ||||
-rw-r--r-- | mozglue/linker/Zip.cpp | 277 | ||||
-rw-r--r-- | mozglue/linker/Zip.h | 388 | ||||
-rw-r--r-- | mozglue/linker/moz.build | 33 | ||||
-rw-r--r-- | mozglue/linker/tests/TestZip.cpp | 61 | ||||
-rw-r--r-- | mozglue/linker/tests/TestZipData.S | 17 | ||||
-rw-r--r-- | mozglue/linker/tests/moz.build | 20 | ||||
-rw-r--r-- | mozglue/linker/tests/no_central_dir.zip | bin | 0 -> 281 bytes | |||
-rw-r--r-- | mozglue/linker/tests/test.zip | bin | 0 -> 574 bytes |
23 files changed, 5623 insertions, 0 deletions
diff --git a/mozglue/linker/BaseElf.cpp b/mozglue/linker/BaseElf.cpp new file mode 100644 index 0000000000..78542b3875 --- /dev/null +++ b/mozglue/linker/BaseElf.cpp @@ -0,0 +1,190 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "BaseElf.h" +#include "Elfxx.h" +#include "Logging.h" +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/RefPtr.h" + +using namespace Elf; + +unsigned long BaseElf::Hash(const char* symbol) { + const unsigned char* sym = reinterpret_cast<const unsigned char*>(symbol); + unsigned long h = 0, g; + while (*sym) { + h = (h << 4) + *sym++; + g = h & 0xf0000000; + h ^= g; + h ^= g >> 24; + } + return h; +} + +void* BaseElf::GetSymbolPtr(const char* symbol) const { + return GetSymbolPtr(symbol, Hash(symbol)); +} + +void* BaseElf::GetSymbolPtr(const char* symbol, unsigned long hash) const { + const Sym* sym = GetSymbol(symbol, hash); + void* ptr = nullptr; + if (sym && sym->st_shndx != SHN_UNDEF) ptr = GetPtr(sym->st_value); + DEBUG_LOG("BaseElf::GetSymbolPtr(%p [\"%s\"], \"%s\") = %p", + reinterpret_cast<const void*>(this), GetPath(), symbol, ptr); + return ptr; +} + +const Sym* BaseElf::GetSymbol(const char* symbol, unsigned long hash) const { + /* Search symbol with the buckets and chains tables. + * The hash computed from the symbol name gives an index in the buckets + * table. The corresponding value in the bucket table is an index in the + * symbols table and in the chains table. + * If the corresponding symbol in the symbols table matches, we're done. + * Otherwise, the corresponding value in the chains table is a new index + * in both tables, which corresponding symbol is tested and so on and so + * forth */ + size_t bucket = hash % buckets.numElements(); + for (size_t y = buckets[bucket]; y != STN_UNDEF; y = chains[y]) { + if (strcmp(symbol, strtab.GetStringAt(symtab[y].st_name))) continue; + return &symtab[y]; + } + return nullptr; +} + +bool BaseElf::Contains(void* addr) const { return base.Contains(addr); } + +#ifdef __ARM_EABI__ +const void* BaseElf::FindExidx(int* pcount) const { + if (arm_exidx) { + *pcount = arm_exidx.numElements(); + return arm_exidx; + } + *pcount = 0; + return nullptr; +} +#endif + +already_AddRefed<LibHandle> LoadedElf::Create(const char* path, + void* base_addr) { + DEBUG_LOG("LoadedElf::Create(\"%s\", %p) = ...", path, base_addr); + + uint8_t mapped; + /* If the page is not mapped, mincore returns an error. If base_addr is + * nullptr, as would happen if the corresponding binary is prelinked with + * the prelink look (but not with the android apriori tool), no page being + * mapped there (right?), mincore returns an error, too, which makes + * prelinked libraries on glibc unsupported. This is not an interesting + * use case for now, so don't try supporting that case. + */ + if (mincore(const_cast<void*>(base_addr), PageSize(), &mapped)) + return nullptr; + + RefPtr<LoadedElf> elf = new LoadedElf(path); + + const Ehdr* ehdr = Ehdr::validate(base_addr); + if (!ehdr) return nullptr; + + Addr min_vaddr = (Addr)-1; // We want to find the lowest and biggest + Addr max_vaddr = 0; // virtual address used by this Elf. + const Phdr* dyn = nullptr; +#ifdef __ARM_EABI__ + const Phdr* arm_exidx_phdr = nullptr; +#endif + + Array<Phdr> phdrs(reinterpret_cast<const char*>(ehdr) + ehdr->e_phoff, + ehdr->e_phnum); + for (auto phdr = phdrs.begin(); phdr < phdrs.end(); ++phdr) { + switch (phdr->p_type) { + case PT_LOAD: + if (phdr->p_vaddr < min_vaddr) min_vaddr = phdr->p_vaddr; + if (max_vaddr < phdr->p_vaddr + phdr->p_memsz) + max_vaddr = phdr->p_vaddr + phdr->p_memsz; + break; + case PT_DYNAMIC: + dyn = &*phdr; + break; +#ifdef __ARM_EABI__ + case PT_ARM_EXIDX: + /* We cannot initialize arm_exidx here + because we don't have a base yet */ + arm_exidx_phdr = &*phdr; + break; +#endif + } + } + + /* If the lowest PT_LOAD virtual address in headers is not 0, then the ELF + * is either prelinked or a non-PIE executable. The former case is not + * possible, because base_addr would be nullptr and the mincore test above + * would already have made us return. + * For a non-PIE executable, PT_LOADs contain absolute addresses, so in + * practice, this means min_vaddr should be equal to base_addr. max_vaddr + * can thus be adjusted accordingly. + */ + if (min_vaddr != 0) { + void* min_vaddr_ptr = + reinterpret_cast<void*>(static_cast<uintptr_t>(min_vaddr)); + if (min_vaddr_ptr != base_addr) { + LOG("%s: %p != %p", elf->GetPath(), min_vaddr_ptr, base_addr); + return nullptr; + } + max_vaddr -= min_vaddr; + } + if (!dyn) { + LOG("%s: No PT_DYNAMIC segment found", elf->GetPath()); + return nullptr; + } + + elf->base.Assign(base_addr, max_vaddr); + + if (!elf->InitDyn(dyn)) return nullptr; + +#ifdef __ARM_EABI__ + if (arm_exidx_phdr) + elf->arm_exidx.InitSize(elf->GetPtr(arm_exidx_phdr->p_vaddr), + arm_exidx_phdr->p_memsz); +#endif + + DEBUG_LOG("LoadedElf::Create(\"%s\", %p) = %p", path, base_addr, + static_cast<void*>(elf)); + + ElfLoader::Singleton.Register(elf); + return elf.forget(); +} + +bool LoadedElf::InitDyn(const Phdr* pt_dyn) { + Array<Dyn> dyns; + dyns.InitSize(GetPtr<Dyn>(pt_dyn->p_vaddr), pt_dyn->p_filesz); + + size_t symnum = 0; + for (auto dyn = dyns.begin(); dyn < dyns.end() && dyn->d_tag; ++dyn) { + switch (dyn->d_tag) { + case DT_HASH: { + DEBUG_LOG("%s 0x%08" PRIxPTR, "DT_HASH", uintptr_t(dyn->d_un.d_val)); + const Elf::Word* hash_table_header = GetPtr<Elf::Word>(dyn->d_un.d_ptr); + symnum = hash_table_header[1]; + buckets.Init(&hash_table_header[2], hash_table_header[0]); + chains.Init(&*buckets.end()); + } break; + case DT_STRTAB: + DEBUG_LOG("%s 0x%08" PRIxPTR, "DT_STRTAB", uintptr_t(dyn->d_un.d_val)); + strtab.Init(GetPtr(dyn->d_un.d_ptr)); + break; + case DT_SYMTAB: + DEBUG_LOG("%s 0x%08" PRIxPTR, "DT_SYMTAB", uintptr_t(dyn->d_un.d_val)); + symtab.Init(GetPtr(dyn->d_un.d_ptr)); + break; + } + } + if (!buckets || !symnum) { + ERROR("%s: Missing or broken DT_HASH", GetPath()); + } else if (!strtab) { + ERROR("%s: Missing DT_STRTAB", GetPath()); + } else if (!symtab) { + ERROR("%s: Missing DT_SYMTAB", GetPath()); + } else { + return true; + } + return false; +} diff --git a/mozglue/linker/BaseElf.h b/mozglue/linker/BaseElf.h new file mode 100644 index 0000000000..9569dbc579 --- /dev/null +++ b/mozglue/linker/BaseElf.h @@ -0,0 +1,128 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BaseElf_h +#define BaseElf_h + +#include "ElfLoader.h" +#include "Elfxx.h" + +/** + * Base class for ELF libraries. This class includes things that will be + * common between SystemElfs and CustomElfs. + */ +class BaseElf : public LibHandle { + public: + /** + * Hash function for symbol lookup, as defined in ELF standard for System V. + */ + static unsigned long Hash(const char* symbol); + + /** + * Returns the address corresponding to the given symbol name (with a + * pre-computed hash). + */ + void* GetSymbolPtr(const char* symbol, unsigned long hash) const; + + /** + * Returns a pointer to the Elf Symbol in the Dynamic Symbol table + * corresponding to the given symbol name (with a pre-computed hash). + */ + const Elf::Sym* GetSymbol(const char* symbol, unsigned long hash) const; + + explicit BaseElf(const char* path, Mappable* mappable = nullptr) + : LibHandle(path), mappable(mappable) {} + + protected: + /** + * Inherited from LibHandle. Those are temporary and are not supposed to + * be used. + */ + virtual void* GetSymbolPtr(const char* symbol) const; + virtual bool Contains(void* addr) const; + virtual void* GetBase() const { return GetPtr(0); } + +#ifdef __ARM_EABI__ + virtual const void* FindExidx(int* pcount) const; +#endif + + virtual Mappable* GetMappable() const { return NULL; }; + + public: + /* private: */ + /** + * Returns a pointer relative to the base address where the library is + * loaded. + */ + void* GetPtr(const Elf::Addr offset) const { + if (reinterpret_cast<void*>(offset) > base) + return reinterpret_cast<void*>(offset); + return base + offset; + } + + /** + * Like the above, but returns a typed (const) pointer + */ + template <typename T> + const T* GetPtr(const Elf::Addr offset) const { + if (reinterpret_cast<void*>(offset) > base) + return reinterpret_cast<const T*>(offset); + return reinterpret_cast<const T*>(base + offset); + } + + /* Appropriated Mappable */ + /* /!\ we rely on this being nullptr for BaseElf instances, but not + * CustomElf instances. */ + RefPtr<Mappable> mappable; + + /* Base address where the library is loaded */ + MappedPtr base; + + /* Buckets and chains for the System V symbol hash table */ + Array<Elf::Word> buckets; + UnsizedArray<Elf::Word> chains; + + /* protected: */ + /* String table */ + Elf::Strtab strtab; + + /* Symbol table */ + UnsizedArray<Elf::Sym> symtab; + +#ifdef __ARM_EABI__ + /* ARM.exidx information used by FindExidx */ + Array<uint32_t[2]> arm_exidx; +#endif +}; + +/** + * Class for ELF libraries that already loaded in memory. + */ +class LoadedElf : public BaseElf { + public: + /** + * Returns a LoadedElf corresponding to the already loaded ELF + * at the given base address. + */ + static already_AddRefed<LibHandle> Create(const char* path, void* base_addr); + + private: + explicit LoadedElf(const char* path) : BaseElf(path) {} + + ~LoadedElf() { + /* Avoid base's destructor unmapping something that doesn't actually + * belong to it. */ + base.release(); + ElfLoader::Singleton.Forget(this); + } + + /** + * Initializes the library according to information found in the given + * PT_DYNAMIC header. + * Returns whether this succeeded or failed. + */ + bool InitDyn(const Elf::Phdr* pt_dyn); +}; + +#endif /* BaseElf_h */ diff --git a/mozglue/linker/CustomElf.cpp b/mozglue/linker/CustomElf.cpp new file mode 100644 index 0000000000..5d44b34d22 --- /dev/null +++ b/mozglue/linker/CustomElf.cpp @@ -0,0 +1,680 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <cstring> +#include <sys/mman.h> +#include <vector> +#include <dlfcn.h> +#include <signal.h> +#include <string.h> +#include "CustomElf.h" +#include "BaseElf.h" +#include "Mappable.h" +#include "Logging.h" +#include "mozilla/IntegerPrintfMacros.h" + +using namespace Elf; + +/* TODO: Fill ElfLoader::Singleton.lastError on errors. */ + +const Ehdr* Ehdr::validate(const void* buf) { + if (!buf || buf == MAP_FAILED) return nullptr; + + const Ehdr* ehdr = reinterpret_cast<const Ehdr*>(buf); + + /* Only support ELF executables or libraries for the host system */ + if (memcmp(ELFMAG, &ehdr->e_ident, SELFMAG) || + ehdr->e_ident[EI_CLASS] != ELFCLASS || + ehdr->e_ident[EI_DATA] != ELFDATA || ehdr->e_ident[EI_VERSION] != 1 || + (ehdr->e_ident[EI_OSABI] != ELFOSABI && + ehdr->e_ident[EI_OSABI] != ELFOSABI_NONE) || +#ifdef EI_ABIVERSION + ehdr->e_ident[EI_ABIVERSION] != ELFABIVERSION || +#endif + (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) || + ehdr->e_machine != ELFMACHINE || ehdr->e_version != 1 || + ehdr->e_phentsize != sizeof(Phdr)) + return nullptr; + + return ehdr; +} + +namespace { + +void debug_phdr(const char* type, const Phdr* phdr) { + DEBUG_LOG("%s @0x%08" PRIxPTR + " (" + "filesz: 0x%08" PRIxPTR + ", " + "memsz: 0x%08" PRIxPTR + ", " + "offset: 0x%08" PRIxPTR + ", " + "flags: %c%c%c)", + type, uintptr_t(phdr->p_vaddr), uintptr_t(phdr->p_filesz), + uintptr_t(phdr->p_memsz), uintptr_t(phdr->p_offset), + phdr->p_flags & PF_R ? 'r' : '-', phdr->p_flags & PF_W ? 'w' : '-', + phdr->p_flags & PF_X ? 'x' : '-'); +} + +static int p_flags_to_mprot(Word flags) { + return ((flags & PF_X) ? PROT_EXEC : 0) | ((flags & PF_W) ? PROT_WRITE : 0) | + ((flags & PF_R) ? PROT_READ : 0); +} + +} /* anonymous namespace */ + +/** + * RAII wrapper for a mapping of the first page off a Mappable object. + * This calls Mappable::munmap instead of system munmap. + */ +class Mappable1stPagePtr : public GenericMappedPtr<Mappable1stPagePtr> { + public: + explicit Mappable1stPagePtr(Mappable* mappable) + : GenericMappedPtr<Mappable1stPagePtr>( + mappable->mmap(nullptr, PageSize(), PROT_READ, MAP_PRIVATE, 0)), + mappable(mappable) {} + + private: + friend class GenericMappedPtr<Mappable1stPagePtr>; + void munmap(void* buf, size_t length) { mappable->munmap(buf, length); } + + RefPtr<Mappable> mappable; +}; + +already_AddRefed<LibHandle> CustomElf::Load(Mappable* mappable, + const char* path, int flags) { + DEBUG_LOG("CustomElf::Load(\"%s\", 0x%x) = ...", path, flags); + if (!mappable) return nullptr; + /* Keeping a RefPtr of the CustomElf is going to free the appropriate + * resources when returning nullptr */ + RefPtr<CustomElf> elf = new CustomElf(mappable, path); + /* Map the first page of the Elf object to access Elf and program headers */ + Mappable1stPagePtr ehdr_raw(mappable); + if (ehdr_raw == MAP_FAILED) return nullptr; + + const Ehdr* ehdr = Ehdr::validate(ehdr_raw); + if (!ehdr) return nullptr; + + /* Scan Elf Program Headers and gather some information about them */ + std::vector<const Phdr*> pt_loads; + Addr min_vaddr = (Addr)-1; // We want to find the lowest and biggest + Addr max_vaddr = 0; // virtual address used by this Elf. + const Phdr* dyn = nullptr; + + const Phdr* first_phdr = reinterpret_cast<const Phdr*>( + reinterpret_cast<const char*>(ehdr) + ehdr->e_phoff); + const Phdr* end_phdr = &first_phdr[ehdr->e_phnum]; +#ifdef __ARM_EABI__ + const Phdr* arm_exidx_phdr = nullptr; +#endif + + for (const Phdr* phdr = first_phdr; phdr < end_phdr; phdr++) { + switch (phdr->p_type) { + case PT_LOAD: + debug_phdr("PT_LOAD", phdr); + pt_loads.push_back(phdr); + if (phdr->p_vaddr < min_vaddr) min_vaddr = phdr->p_vaddr; + if (max_vaddr < phdr->p_vaddr + phdr->p_memsz) + max_vaddr = phdr->p_vaddr + phdr->p_memsz; + break; + case PT_DYNAMIC: + debug_phdr("PT_DYNAMIC", phdr); + if (!dyn) { + dyn = phdr; + } else { + ERROR("%s: Multiple PT_DYNAMIC segments detected", elf->GetPath()); + return nullptr; + } + break; + case PT_TLS: + debug_phdr("PT_TLS", phdr); + if (phdr->p_memsz) { + ERROR("%s: TLS is not supported", elf->GetPath()); + return nullptr; + } + break; + case PT_GNU_STACK: + debug_phdr("PT_GNU_STACK", phdr); +// Skip on Android until bug 706116 is fixed +#ifndef ANDROID + if (phdr->p_flags & PF_X) { + ERROR("%s: Executable stack is not supported", elf->GetPath()); + return nullptr; + } +#endif + break; +#ifdef __ARM_EABI__ + case PT_ARM_EXIDX: + /* We cannot initialize arm_exidx here + because we don't have a base yet */ + arm_exidx_phdr = phdr; + break; +#endif + default: + DEBUG_LOG("%s: Program header type #%d not handled", elf->GetPath(), + phdr->p_type); + } + } + + if (min_vaddr != 0) { + ERROR("%s: Unsupported minimal virtual address: 0x%08" PRIxPTR, + elf->GetPath(), uintptr_t(min_vaddr)); + return nullptr; + } + if (!dyn) { + ERROR("%s: No PT_DYNAMIC segment found", elf->GetPath()); + return nullptr; + } + + /* Reserve enough memory to map the complete virtual address space for this + * library. + * As we are using the base address from here to mmap something else with + * MAP_FIXED | MAP_SHARED, we need to make sure these mmaps will work. For + * instance, on armv6, MAP_SHARED mappings require a 16k alignment, but mmap + * MAP_PRIVATE only returns a 4k aligned address. So we first get a base + * address with MAP_SHARED, which guarantees the kernel returns an address + * that we'll be able to use with MAP_FIXED, and then remap MAP_PRIVATE at + * the same address, because of some bad side effects of keeping it as + * MAP_SHARED. */ + elf->base.Assign(MemoryRange::mmap(nullptr, max_vaddr, PROT_NONE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0)); + if ((elf->base == MAP_FAILED) || + (mmap(elf->base, max_vaddr, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != elf->base)) { + ERROR("%s: Failed to mmap", elf->GetPath()); + return nullptr; + } + + /* Load and initialize library */ + for (std::vector<const Phdr*>::iterator it = pt_loads.begin(); + it < pt_loads.end(); ++it) + if (!elf->LoadSegment(*it)) return nullptr; + + /* We're not going to mmap anymore */ + mappable->finalize(); + + elf->l_addr = elf->base; + elf->l_name = elf->GetPath(); + elf->l_ld = elf->GetPtr<Dyn>(dyn->p_vaddr); + ElfLoader::Singleton.Register(elf); + + if (!elf->InitDyn(dyn)) return nullptr; + + if (elf->has_text_relocs) { + for (std::vector<const Phdr*>::iterator it = pt_loads.begin(); + it < pt_loads.end(); ++it) + mprotect(PageAlignedPtr(elf->GetPtr((*it)->p_vaddr)), + PageAlignedEndPtr((*it)->p_memsz), + p_flags_to_mprot((*it)->p_flags) | PROT_WRITE); + } + + if (!elf->Relocate() || !elf->RelocateJumps()) return nullptr; + + if (elf->has_text_relocs) { + for (std::vector<const Phdr*>::iterator it = pt_loads.begin(); + it < pt_loads.end(); ++it) + mprotect(PageAlignedPtr(elf->GetPtr((*it)->p_vaddr)), + PageAlignedEndPtr((*it)->p_memsz), + p_flags_to_mprot((*it)->p_flags)); + } + + if (!elf->CallInit()) return nullptr; + +#ifdef __ARM_EABI__ + if (arm_exidx_phdr) + elf->arm_exidx.InitSize(elf->GetPtr(arm_exidx_phdr->p_vaddr), + arm_exidx_phdr->p_memsz); +#endif + + DEBUG_LOG("CustomElf::Load(\"%s\", 0x%x) = %p", path, flags, + static_cast<void*>(elf)); + return elf.forget(); +} + +CustomElf::~CustomElf() { + DEBUG_LOG("CustomElf::~CustomElf(%p [\"%s\"])", reinterpret_cast<void*>(this), + GetPath()); + CallFini(); + /* Normally, __cxa_finalize is called by the .fini function. However, + * Android NDK before r6b doesn't do that. Our wrapped cxa_finalize only + * calls destructors once, so call it in all cases. */ + ElfLoader::__wrap_cxa_finalize(this); + ElfLoader::Singleton.Forget(this); +} + +void* CustomElf::GetSymbolPtrInDeps(const char* symbol) const { + /* Resolve dlopen and related functions to point to ours */ + if (symbol[0] == 'd' && symbol[1] == 'l') { + if (strcmp(symbol + 2, "open") == 0) return FunctionPtr(__wrap_dlopen); + if (strcmp(symbol + 2, "error") == 0) return FunctionPtr(__wrap_dlerror); + if (strcmp(symbol + 2, "close") == 0) return FunctionPtr(__wrap_dlclose); + if (strcmp(symbol + 2, "sym") == 0) return FunctionPtr(__wrap_dlsym); + if (strcmp(symbol + 2, "addr") == 0) return FunctionPtr(__wrap_dladdr); + if (strcmp(symbol + 2, "_iterate_phdr") == 0) + return FunctionPtr(__wrap_dl_iterate_phdr); + } else if (symbol[0] == '_' && symbol[1] == '_') { + /* Resolve a few C++ ABI specific functions to point to ours */ +#ifdef __ARM_EABI__ + if (strcmp(symbol + 2, "aeabi_atexit") == 0) + return FunctionPtr(&ElfLoader::__wrap_aeabi_atexit); +#else + if (strcmp(symbol + 2, "cxa_atexit") == 0) + return FunctionPtr(&ElfLoader::__wrap_cxa_atexit); +#endif + if (strcmp(symbol + 2, "cxa_finalize") == 0) + return FunctionPtr(&ElfLoader::__wrap_cxa_finalize); + if (strcmp(symbol + 2, "dso_handle") == 0) + return const_cast<CustomElf*>(this); +#ifdef __ARM_EABI__ + if (strcmp(symbol + 2, "gnu_Unwind_Find_exidx") == 0) + return FunctionPtr(__wrap___gnu_Unwind_Find_exidx); +#endif + } else if (symbol[0] == 's' && symbol[1] == 'i') { + if (strcmp(symbol + 2, "gnal") == 0) return FunctionPtr(signal); + if (strcmp(symbol + 2, "gaction") == 0) return FunctionPtr(sigaction); + } + + void* sym; + + unsigned long hash = Hash(symbol); + + /* self_elf should never be NULL, but better safe than sorry. */ + if (ElfLoader::Singleton.self_elf) { + /* We consider the library containing this code a permanent LD_PRELOAD, + * so, check if the symbol exists here first. */ + sym = static_cast<BaseElf*>(ElfLoader::Singleton.self_elf.get()) + ->GetSymbolPtr(symbol, hash); + if (sym) return sym; + } + + /* Then search the symbol in our dependencies. Since we already searched in + * libraries the system linker loaded, skip those (on glibc systems). We + * also assume the symbol is to be found in one of the dependent libraries + * directly, not in their own dependent libraries. Building libraries with + * --no-allow-shlib-undefined ensures such indirect symbol dependency don't + * happen. */ + for (std::vector<RefPtr<LibHandle> >::const_iterator it = + dependencies.begin(); + it < dependencies.end(); ++it) { + /* Skip if it's the library containing this code, since we've already + * looked at it above. */ + if (*it == ElfLoader::Singleton.self_elf) continue; + if (BaseElf* be = (*it)->AsBaseElf()) { + sym = be->GetSymbolPtr(symbol, hash); + } else { + sym = (*it)->GetSymbolPtr(symbol); + } + if (sym) return sym; + } + return nullptr; +} + +bool CustomElf::LoadSegment(const Phdr* pt_load) const { + if (pt_load->p_type != PT_LOAD) { + DEBUG_LOG("%s: Elf::LoadSegment only takes PT_LOAD program headers", + GetPath()); + return false; + ; + } + + int prot = p_flags_to_mprot(pt_load->p_flags); + + /* Mmap at page boundary */ + Addr align = PageSize(); + Addr align_offset; + void *mapped, *where; + do { + align_offset = pt_load->p_vaddr - AlignedPtr(pt_load->p_vaddr, align); + where = GetPtr(pt_load->p_vaddr - align_offset); + DEBUG_LOG("%s: Loading segment @%p %c%c%c", GetPath(), where, + prot & PROT_READ ? 'r' : '-', prot & PROT_WRITE ? 'w' : '-', + prot & PROT_EXEC ? 'x' : '-'); + mapped = mappable->mmap(where, pt_load->p_filesz + align_offset, prot, + MAP_PRIVATE | MAP_FIXED, + pt_load->p_offset - align_offset); + if ((mapped != MAP_FAILED) || (pt_load->p_vaddr == 0) || + (pt_load->p_align == align)) + break; + /* The virtual address space for the library is properly aligned at + * 16k on ARMv6 (see CustomElf::Load), and so is the first segment + * (p_vaddr == 0). But subsequent segments may not be 16k aligned + * and fail to mmap. In such case, try to mmap again at the p_align + * boundary instead of page boundary. */ + DEBUG_LOG("%s: Failed to mmap, retrying", GetPath()); + align = pt_load->p_align; + } while (1); + + if (mapped != where) { + if (mapped == MAP_FAILED) { + ERROR("%s: Failed to mmap", GetPath()); + } else { + ERROR("%s: Didn't map at the expected location (wanted: %p, got: %p)", + GetPath(), where, mapped); + } + return false; + } + + /* When p_memsz is greater than p_filesz, we need to have nulled out memory + * after p_filesz and before p_memsz. + * Above the end of the last page, and up to p_memsz, we already have nulled + * out memory because we mapped anonymous memory on the whole library virtual + * address space. We just need to adjust this anonymous memory protection + * flags. */ + if (pt_load->p_memsz > pt_load->p_filesz) { + Addr file_end = pt_load->p_vaddr + pt_load->p_filesz; + Addr mem_end = pt_load->p_vaddr + pt_load->p_memsz; + Addr next_page = PageAlignedEndPtr(file_end); + if (next_page > file_end) { + void* ptr = GetPtr(file_end); + memset(ptr, 0, next_page - file_end); + } + if (mem_end > next_page) { + if (mprotect(GetPtr(next_page), mem_end - next_page, prot) < 0) { + ERROR("%s: Failed to mprotect", GetPath()); + return false; + } + } + } + return true; +} + +namespace { + +void debug_dyn(const char* type, const Dyn* dyn) { + DEBUG_LOG("%s 0x%08" PRIxPTR, type, uintptr_t(dyn->d_un.d_val)); +} + +} /* anonymous namespace */ + +bool CustomElf::InitDyn(const Phdr* pt_dyn) { + /* Scan PT_DYNAMIC segment and gather some information */ + const Dyn* first_dyn = GetPtr<Dyn>(pt_dyn->p_vaddr); + const Dyn* end_dyn = GetPtr<Dyn>(pt_dyn->p_vaddr + pt_dyn->p_filesz); + std::vector<Word> dt_needed; + size_t symnum = 0; + for (const Dyn* dyn = first_dyn; dyn < end_dyn && dyn->d_tag; dyn++) { + switch (dyn->d_tag) { + case DT_NEEDED: + debug_dyn("DT_NEEDED", dyn); + dt_needed.push_back(dyn->d_un.d_val); + break; + case DT_HASH: { + debug_dyn("DT_HASH", dyn); + const Word* hash_table_header = GetPtr<Word>(dyn->d_un.d_ptr); + symnum = hash_table_header[1]; + buckets.Init(&hash_table_header[2], hash_table_header[0]); + chains.Init(&*buckets.end()); + } break; + case DT_STRTAB: + debug_dyn("DT_STRTAB", dyn); + strtab.Init(GetPtr(dyn->d_un.d_ptr)); + break; + case DT_SYMTAB: + debug_dyn("DT_SYMTAB", dyn); + symtab.Init(GetPtr(dyn->d_un.d_ptr)); + break; + case DT_SYMENT: + debug_dyn("DT_SYMENT", dyn); + if (dyn->d_un.d_val != sizeof(Sym)) { + ERROR("%s: Unsupported DT_SYMENT", GetPath()); + return false; + } + break; + case DT_TEXTREL: + if (strcmp("libflashplayer.so", GetName()) == 0) { + has_text_relocs = true; + } else { + ERROR("%s: Text relocations are not supported", GetPath()); + return false; + } + break; + case DT_STRSZ: /* Ignored */ + debug_dyn("DT_STRSZ", dyn); + break; + case UNSUPPORTED_RELOC(): + case UNSUPPORTED_RELOC(SZ): + case UNSUPPORTED_RELOC(ENT): + ERROR("%s: Unsupported relocations", GetPath()); + return false; + case RELOC(): + debug_dyn(STR_RELOC(), dyn); + relocations.Init(GetPtr(dyn->d_un.d_ptr)); + break; + case RELOC(SZ): + debug_dyn(STR_RELOC(SZ), dyn); + relocations.InitSize(dyn->d_un.d_val); + break; + case RELOC(ENT): + debug_dyn(STR_RELOC(ENT), dyn); + if (dyn->d_un.d_val != sizeof(Reloc)) { + ERROR("%s: Unsupported DT_RELENT", GetPath()); + return false; + } + break; + case DT_JMPREL: + debug_dyn("DT_JMPREL", dyn); + jumprels.Init(GetPtr(dyn->d_un.d_ptr)); + break; + case DT_PLTRELSZ: + debug_dyn("DT_PLTRELSZ", dyn); + jumprels.InitSize(dyn->d_un.d_val); + break; + case DT_PLTGOT: + debug_dyn("DT_PLTGOT", dyn); + break; + case DT_INIT: + debug_dyn("DT_INIT", dyn); + init = dyn->d_un.d_ptr; + break; + case DT_INIT_ARRAY: + debug_dyn("DT_INIT_ARRAY", dyn); + init_array.Init(GetPtr(dyn->d_un.d_ptr)); + break; + case DT_INIT_ARRAYSZ: + debug_dyn("DT_INIT_ARRAYSZ", dyn); + init_array.InitSize(dyn->d_un.d_val); + break; + case DT_FINI: + debug_dyn("DT_FINI", dyn); + fini = dyn->d_un.d_ptr; + break; + case DT_FINI_ARRAY: + debug_dyn("DT_FINI_ARRAY", dyn); + fini_array.Init(GetPtr(dyn->d_un.d_ptr)); + break; + case DT_FINI_ARRAYSZ: + debug_dyn("DT_FINI_ARRAYSZ", dyn); + fini_array.InitSize(dyn->d_un.d_val); + break; + case DT_PLTREL: + if (dyn->d_un.d_val != RELOC()) { + ERROR("%s: Error: DT_PLTREL is not " STR_RELOC(), GetPath()); + return false; + } + break; + case DT_FLAGS: { + Addr flags = dyn->d_un.d_val; + /* Treat as a DT_TEXTREL tag */ + if (flags & DF_TEXTREL) { + if (strcmp("libflashplayer.so", GetName()) == 0) { + has_text_relocs = true; + } else { + ERROR("%s: Text relocations are not supported", GetPath()); + return false; + } + } + /* we can treat this like having a DT_SYMBOLIC tag */ + flags &= ~DF_SYMBOLIC; + if (flags) + WARN("%s: unhandled flags #%" PRIxPTR " not handled", GetPath(), + uintptr_t(flags)); + } break; + case DT_SONAME: /* Should match GetName(), but doesn't matter */ + case DT_SYMBOLIC: /* Indicates internal symbols should be looked up in + * the library itself first instead of the executable, + * which is actually what this linker does by default */ + case RELOC(COUNT): /* Indicates how many relocations are relative, which + * is usually used to skip relocations on prelinked + * libraries. They are not supported anyways. */ + case UNSUPPORTED_RELOC(COUNT): /* This should error out, but it doesn't + * really matter. */ + case DT_FLAGS_1: /* Additional linker-internal flags that we don't care + * about. See DF_1_* values in src/include/elf/common.h + * in binutils. */ + case DT_VERSYM: /* DT_VER* entries are used for symbol versioning, which + */ + case DT_VERDEF: /* this linker doesn't support yet. */ + case DT_VERDEFNUM: + case DT_VERNEED: + case DT_VERNEEDNUM: + /* Ignored */ + break; + default: + WARN("%s: dynamic header type #%" PRIxPTR " not handled", GetPath(), + uintptr_t(dyn->d_tag)); + } + } + + if (!buckets || !symnum) { + ERROR("%s: Missing or broken DT_HASH", GetPath()); + return false; + } + if (!strtab) { + ERROR("%s: Missing DT_STRTAB", GetPath()); + return false; + } + if (!symtab) { + ERROR("%s: Missing DT_SYMTAB", GetPath()); + return false; + } + + /* Load dependent libraries */ + for (size_t i = 0; i < dt_needed.size(); i++) { + const char* name = strtab.GetStringAt(dt_needed[i]); + RefPtr<LibHandle> handle = + ElfLoader::Singleton.Load(name, RTLD_GLOBAL | RTLD_LAZY, this); + if (!handle) return false; + dependencies.push_back(handle); + } + + return true; +} + +bool CustomElf::Relocate() { + DEBUG_LOG("Relocate %s @%p", GetPath(), static_cast<void*>(base)); + uint32_t symtab_index = (uint32_t)-1; + void* symptr = nullptr; + for (Array<Reloc>::iterator rel = relocations.begin(); + rel < relocations.end(); ++rel) { + /* Location of the relocation */ + void* ptr = GetPtr(rel->r_offset); + + /* R_*_RELATIVE relocations apply directly at the given location */ + if (ELF_R_TYPE(rel->r_info) == R_RELATIVE) { + *(void**)ptr = GetPtr(rel->GetAddend(base)); + continue; + } + /* Other relocation types need a symbol resolution */ + /* Avoid symbol resolution when it's the same symbol as last iteration */ + if (symtab_index != ELF_R_SYM(rel->r_info)) { + symtab_index = ELF_R_SYM(rel->r_info); + const Sym sym = symtab[symtab_index]; + if (sym.st_shndx != SHN_UNDEF) { + symptr = GetPtr(sym.st_value); + } else { + /* TODO: handle symbol resolving to nullptr vs. being undefined. */ + symptr = GetSymbolPtrInDeps(strtab.GetStringAt(sym.st_name)); + } + } + + if (symptr == nullptr) + WARN("%s: Relocation to NULL @0x%08" PRIxPTR, GetPath(), + uintptr_t(rel->r_offset)); + + /* Apply relocation */ + switch (ELF_R_TYPE(rel->r_info)) { + case R_GLOB_DAT: + /* R_*_GLOB_DAT relocations simply use the symbol value */ + *(void**)ptr = symptr; + break; + case R_ABS: + /* R_*_ABS* relocations add the relocation added to the symbol value */ + *(const char**)ptr = (const char*)symptr + rel->GetAddend(base); + break; + default: + ERROR("%s: Unsupported relocation type: 0x%" PRIxPTR, GetPath(), + uintptr_t(ELF_R_TYPE(rel->r_info))); + return false; + } + } + return true; +} + +bool CustomElf::RelocateJumps() { + /* TODO: Dynamic symbol resolution */ + for (Array<Reloc>::iterator rel = jumprels.begin(); rel < jumprels.end(); + ++rel) { + /* Location of the relocation */ + void* ptr = GetPtr(rel->r_offset); + + /* Only R_*_JMP_SLOT relocations are expected */ + if (ELF_R_TYPE(rel->r_info) != R_JMP_SLOT) { + ERROR("%s: Jump relocation type mismatch", GetPath()); + return false; + } + + /* TODO: Avoid code duplication with the relocations above */ + const Sym sym = symtab[ELF_R_SYM(rel->r_info)]; + void* symptr; + if (sym.st_shndx != SHN_UNDEF) + symptr = GetPtr(sym.st_value); + else + symptr = GetSymbolPtrInDeps(strtab.GetStringAt(sym.st_name)); + + if (symptr == nullptr) { + if (ELF_ST_BIND(sym.st_info) == STB_WEAK) { + WARN("%s: Relocation to NULL @0x%08" PRIxPTR " for symbol \"%s\"", + GetPath(), uintptr_t(rel->r_offset), + strtab.GetStringAt(sym.st_name)); + } else { + ERROR("%s: Relocation to NULL @0x%08" PRIxPTR " for symbol \"%s\"", + GetPath(), uintptr_t(rel->r_offset), + strtab.GetStringAt(sym.st_name)); + return false; + } + } + /* Apply relocation */ + *(void**)ptr = symptr; + } + return true; +} + +bool CustomElf::CallInit() { + if (init) CallFunction(init); + + for (Array<void*>::iterator it = init_array.begin(); it < init_array.end(); + ++it) { + /* Android x86 NDK wrongly puts 0xffffffff in INIT_ARRAY */ + if (*it && *it != reinterpret_cast<void*>(-1)) CallFunction(*it); + } + initialized = true; + return true; +} + +void CustomElf::CallFini() { + if (!initialized) return; + for (Array<void*>::reverse_iterator it = fini_array.rbegin(); + it < fini_array.rend(); ++it) { + /* Android x86 NDK wrongly puts 0xffffffff in FINI_ARRAY */ + if (*it && *it != reinterpret_cast<void*>(-1)) CallFunction(*it); + } + if (fini) CallFunction(fini); +} + +Mappable* CustomElf::GetMappable() const { + if (!mappable) return nullptr; + if (mappable->GetKind() == Mappable::MAPPABLE_EXTRACT_FILE) return mappable; + return ElfLoader::GetMappableFromPath(GetPath()); +} diff --git a/mozglue/linker/CustomElf.h b/mozglue/linker/CustomElf.h new file mode 100644 index 0000000000..f7b116e9d3 --- /dev/null +++ b/mozglue/linker/CustomElf.h @@ -0,0 +1,147 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef CustomElf_h +#define CustomElf_h + +#include "ElfLoader.h" +#include "BaseElf.h" +#include "Logging.h" +#include "Elfxx.h" + +/** + * Library Handle class for ELF libraries we don't let the system linker + * handle. + */ +class CustomElf : public BaseElf, private ElfLoader::link_map { + friend class ElfLoader; + friend class SEGVHandler; + + public: + /** + * Returns a new CustomElf using the given file descriptor to map ELF + * content. The file descriptor ownership is stolen, and it will be closed + * in CustomElf's destructor if an instance is created, or by the Load + * method otherwise. The path corresponds to the file descriptor, and flags + * are the same kind of flags that would be given to dlopen(), though + * currently, none are supported and the behaviour is more or less that of + * RTLD_GLOBAL | RTLD_BIND_NOW. + */ + static already_AddRefed<LibHandle> Load(Mappable* mappable, const char* path, + int flags); + + /** + * Inherited from LibHandle/BaseElf + */ + virtual ~CustomElf(); + + protected: + virtual Mappable* GetMappable() const; + + public: + /** + * Returns the instance, casted as BaseElf. (short of a better way to do + * this without RTTI) + */ + virtual BaseElf* AsBaseElf() { return this; } + + private: + /** + * Scan dependent libraries to find the address corresponding to the + * given symbol name. This is used to find symbols that are undefined + * in the Elf object. + */ + void* GetSymbolPtrInDeps(const char* symbol) const; + + /** + * Private constructor + */ + CustomElf(Mappable* mappable, const char* path) + : BaseElf(path, mappable), + link_map(), + init(0), + fini(0), + initialized(false), + has_text_relocs(false) {} + + /** + * Loads an Elf segment defined by the given PT_LOAD header. + * Returns whether this succeeded or failed. + */ + bool LoadSegment(const Elf::Phdr* pt_load) const; + + /** + * Initializes the library according to information found in the given + * PT_DYNAMIC header. + * Returns whether this succeeded or failed. + */ + bool InitDyn(const Elf::Phdr* pt_dyn); + + /** + * Apply .rel.dyn/.rela.dyn relocations. + * Returns whether this succeeded or failed. + */ + bool Relocate(); + + /** + * Apply .rel.plt/.rela.plt relocations. + * Returns whether this succeeded or failed. + */ + bool RelocateJumps(); + + /** + * Call initialization functions (.init/.init_array) + * Returns true; + */ + bool CallInit(); + + /** + * Call destructor functions (.fini_array/.fini) + * Returns whether this succeeded or failed. + */ + void CallFini(); + + /** + * Call a function given a pointer to its location. + */ + void CallFunction(void* ptr) const { + /* C++ doesn't allow direct conversion between pointer-to-object + * and pointer-to-function. */ + union { + void* ptr; + void (*func)(void); + } f; + f.ptr = ptr; + DEBUG_LOG("%s: Calling function @%p", GetPath(), ptr); + f.func(); + } + + /** + * Call a function given a an address relative to the library base + */ + void CallFunction(Elf::Addr addr) const { return CallFunction(GetPtr(addr)); } + + /* List of dependent libraries */ + std::vector<RefPtr<LibHandle> > dependencies; + + /* List of .rel.dyn/.rela.dyn relocations */ + Array<Elf::Reloc> relocations; + + /* List of .rel.plt/.rela.plt relocation */ + Array<Elf::Reloc> jumprels; + + /* Relative address of the initialization and destruction functions + * (.init/.fini) */ + Elf::Addr init, fini; + + /* List of initialization and destruction functions + * (.init_array/.fini_array) */ + Array<void*> init_array, fini_array; + + bool initialized; + + bool has_text_relocs; +}; + +#endif /* CustomElf_h */ diff --git a/mozglue/linker/ElfLoader.cpp b/mozglue/linker/ElfLoader.cpp new file mode 100644 index 0000000000..55b113467a --- /dev/null +++ b/mozglue/linker/ElfLoader.cpp @@ -0,0 +1,1360 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <string> +#include <cstring> +#include <cstdlib> +#include <cstdio> +#include <dlfcn.h> +#include <unistd.h> +#include <errno.h> +#include <algorithm> +#include <fcntl.h> +#include "ElfLoader.h" +#include "BaseElf.h" +#include "CustomElf.h" +#include "Mappable.h" +#include "Logging.h" +#include "Utils.h" +#include <inttypes.h> + +// From Utils.h +mozilla::Atomic<size_t, mozilla::ReleaseAcquire> gPageSize; + +#if defined(ANDROID) +# include <sys/syscall.h> +# include <sys/system_properties.h> +# include <math.h> + +# include <android/api-level.h> + +/** + * Return the current Android version, or 0 on failure. + */ +static int GetAndroidSDKVersion() { + static int version = 0; + if (version) { + return version; + } + + char version_string[PROP_VALUE_MAX] = {'\0'}; + int len = __system_property_get("ro.build.version.sdk", version_string); + if (len) { + version = static_cast<int>(strtol(version_string, nullptr, 10)); + } + return version; +} + +# if __ANDROID_API__ < 8 +/* Android API < 8 doesn't provide sigaltstack */ + +extern "C" { + +inline int sigaltstack(const stack_t* ss, stack_t* oss) { + return syscall(__NR_sigaltstack, ss, oss); +} + +} /* extern "C" */ +# endif /* __ANDROID_API__ */ +#endif /* ANDROID */ + +#ifdef __ARM_EABI__ +extern "C" MOZ_EXPORT const void* __gnu_Unwind_Find_exidx(void* pc, int* pcount) + __attribute__((weak)); +#endif + +/* Ideally we'd #include <link.h>, but that's a world of pain + * Moreover, not all versions of android support it, so we need a weak + * reference. */ +extern "C" MOZ_EXPORT int dl_iterate_phdr(dl_phdr_cb callback, void* data) + __attribute__((weak)); + +/* Pointer to the PT_DYNAMIC section of the executable or library + * containing this code. */ +extern "C" Elf::Dyn _DYNAMIC[]; + +/** + * dlfcn.h replacements functions + */ + +void* __wrap_dlopen(const char* path, int flags) { +#if defined(ANDROID) + if (GetAndroidSDKVersion() >= 23) { + return dlopen(path, flags); + } +#endif + + RefPtr<LibHandle> handle = ElfLoader::Singleton.Load(path, flags); + if (handle) handle->AddDirectRef(); + return handle; +} + +const char* __wrap_dlerror(void) { +#if defined(ANDROID) + if (GetAndroidSDKVersion() >= 23) { + return dlerror(); + } +#endif + + const char* error = ElfLoader::Singleton.lastError.exchange(nullptr); + if (error) { + // Return a custom error if available. + return error; + } + // Or fallback to the system error. + return dlerror(); +} + +void* __wrap_dlsym(void* handle, const char* symbol) { +#if defined(ANDROID) + if (GetAndroidSDKVersion() >= 23) { + return dlsym(handle, symbol); + } +#endif + + if (!handle) { + ElfLoader::Singleton.lastError = "dlsym(NULL, sym) unsupported"; + return nullptr; + } + if (handle != RTLD_DEFAULT && handle != RTLD_NEXT) { + LibHandle* h = reinterpret_cast<LibHandle*>(handle); + return h->GetSymbolPtr(symbol); + } + + ElfLoader::Singleton.lastError = nullptr; // Use system dlerror. + return dlsym(handle, symbol); +} + +int __wrap_dlclose(void* handle) { +#if defined(ANDROID) + if (GetAndroidSDKVersion() >= 23) { + return dlclose(handle); + } +#endif + + if (!handle) { + ElfLoader::Singleton.lastError = "No handle given to dlclose()"; + return -1; + } + reinterpret_cast<LibHandle*>(handle)->ReleaseDirectRef(); + return 0; +} + +int __wrap_dladdr(const void* addr, Dl_info* info) { +#if defined(ANDROID) + if (GetAndroidSDKVersion() >= 23) { + return dladdr(addr, info); + } +#endif + + RefPtr<LibHandle> handle = + ElfLoader::Singleton.GetHandleByPtr(const_cast<void*>(addr)); + if (!handle) { + return dladdr(addr, info); + } + info->dli_fname = handle->GetPath(); + info->dli_fbase = handle->GetBase(); + return 1; +} + +class DlIteratePhdrHelper { + public: + DlIteratePhdrHelper() { + int pipefd[2]; + valid_pipe = (pipe(pipefd) == 0); + read_fd.reset(pipefd[0]); + write_fd.reset(pipefd[1]); + } + + int fill_and_call(dl_phdr_cb callback, const void* l_addr, const char* l_name, + void* data); + + private: + bool valid_pipe; + AutoCloseFD read_fd; + AutoCloseFD write_fd; +}; + +// This function is called for each shared library iterated over by +// dl_iterate_phdr, and is used to fill a dl_phdr_info which is then +// sent through to the dl_iterate_phdr callback. +int DlIteratePhdrHelper::fill_and_call(dl_phdr_cb callback, const void* l_addr, + const char* l_name, void* data) { + dl_phdr_info info; + info.dlpi_addr = reinterpret_cast<Elf::Addr>(l_addr); + info.dlpi_name = l_name; + info.dlpi_phdr = nullptr; + info.dlpi_phnum = 0; + + // Assuming l_addr points to Elf headers (in most cases, this is true), + // get the Phdr location from there. + // Unfortunately, when l_addr doesn't point to Elf headers, it may point + // to unmapped memory, or worse, unreadable memory. The only way to detect + // the latter without causing a SIGSEGV is to use the pointer in a system + // call that will try to read from there, and return an EFAULT error if + // it can't. One such system call is write(). It used to be possible to + // use a file descriptor on /dev/null for these kind of things, but recent + // Linux kernels never return an EFAULT error when using /dev/null. + // So instead, we use a self pipe. We do however need to read() from the + // read end of the pipe as well so as to not fill up the pipe buffer and + // block on subsequent writes. + // In the unlikely event reads from or write to the pipe fail for some + // other reason than EFAULT, we don't try any further and just skip setting + // the Phdr location for all subsequent libraries, rather than trying to + // start over with a new pipe. + int can_read = true; + if (valid_pipe) { + int ret; + char raw_ehdr[sizeof(Elf::Ehdr)]; + static_assert(sizeof(raw_ehdr) < PIPE_BUF, "PIPE_BUF is too small"); + do { + // writes are atomic when smaller than PIPE_BUF, per POSIX.1-2008. + ret = write(write_fd, l_addr, sizeof(raw_ehdr)); + } while (ret == -1 && errno == EINTR); + if (ret != sizeof(raw_ehdr)) { + if (ret == -1 && errno == EFAULT) { + can_read = false; + } else { + valid_pipe = false; + } + } else { + size_t nbytes = 0; + do { + // Per POSIX.1-2008, interrupted reads can return a length smaller + // than the given one instead of failing with errno EINTR. + ret = read(read_fd, raw_ehdr + nbytes, sizeof(raw_ehdr) - nbytes); + if (ret > 0) nbytes += ret; + } while ((nbytes != sizeof(raw_ehdr) && ret > 0) || + (ret == -1 && errno == EINTR)); + if (nbytes != sizeof(raw_ehdr)) { + valid_pipe = false; + } + } + } + + if (valid_pipe && can_read) { + const Elf::Ehdr* ehdr = Elf::Ehdr::validate(l_addr); + if (ehdr) { + info.dlpi_phdr = reinterpret_cast<const Elf::Phdr*>( + reinterpret_cast<const char*>(ehdr) + ehdr->e_phoff); + info.dlpi_phnum = ehdr->e_phnum; + } + } + + return callback(&info, sizeof(dl_phdr_info), data); +} + +int __wrap_dl_iterate_phdr(dl_phdr_cb callback, void* data) { +#if defined(ANDROID) + if (GetAndroidSDKVersion() >= 23) { + return dl_iterate_phdr(callback, data); + } +#endif + + DlIteratePhdrHelper helper; + AutoLock lock(&ElfLoader::Singleton.handlesMutex); + + if (dl_iterate_phdr) { + for (ElfLoader::LibHandleList::reverse_iterator it = + ElfLoader::Singleton.handles.rbegin(); + it < ElfLoader::Singleton.handles.rend(); ++it) { + BaseElf* elf = (*it)->AsBaseElf(); + if (!elf) { + continue; + } + int ret = helper.fill_and_call(callback, (*it)->GetBase(), + (*it)->GetPath(), data); + if (ret) return ret; + } + return dl_iterate_phdr(callback, data); + } + + /* For versions of Android that don't support dl_iterate_phdr (< 5.0), + * we go through the debugger helper data, which is known to be racy, but + * there's not much we can do about this :( . */ + if (!ElfLoader::Singleton.dbg) return -1; + + for (ElfLoader::DebuggerHelper::iterator it = + ElfLoader::Singleton.dbg.begin(); + it < ElfLoader::Singleton.dbg.end(); ++it) { + int ret = helper.fill_and_call(callback, it->l_addr, it->l_name, data); + if (ret) return ret; + } + return 0; +} + +#ifdef __ARM_EABI__ +const void* __wrap___gnu_Unwind_Find_exidx(void* pc, int* pcount) { + RefPtr<LibHandle> handle = ElfLoader::Singleton.GetHandleByPtr(pc); + if (handle) return handle->FindExidx(pcount); + if (__gnu_Unwind_Find_exidx) return __gnu_Unwind_Find_exidx(pc, pcount); + *pcount = 0; + return nullptr; +} +#endif + +/** + * faulty.lib public API + */ + +MFBT_API size_t __dl_get_mappable_length(void* handle) { + if (!handle) return 0; + return reinterpret_cast<LibHandle*>(handle)->GetMappableLength(); +} + +MFBT_API void* __dl_mmap(void* handle, void* addr, size_t length, + off_t offset) { + if (!handle) return nullptr; + return reinterpret_cast<LibHandle*>(handle)->MappableMMap(addr, length, + offset); +} + +MFBT_API void __dl_munmap(void* handle, void* addr, size_t length) { + if (!handle) return; + return reinterpret_cast<LibHandle*>(handle)->MappableMUnmap(addr, length); +} + +MFBT_API bool IsSignalHandlingBroken() { + return ElfLoader::Singleton.isSignalHandlingBroken(); +} + +namespace { + +/** + * Returns the part after the last '/' for the given path + */ +const char* LeafName(const char* path) { + const char* lastSlash = strrchr(path, '/'); + if (lastSlash) return lastSlash + 1; + return path; +} + +/** + * Run the given lambda while holding the internal lock of the system linker. + * To take the lock, we call the system dl_iterate_phdr and invoke the lambda + * from the callback, which is called while the lock is held. Return true on + * success. + */ +template <class Lambda> +static bool RunWithSystemLinkerLock(Lambda&& aLambda) { + if (!dl_iterate_phdr) { + // No dl_iterate_phdr support. + return false; + } + +#if defined(ANDROID) + if (GetAndroidSDKVersion() < 23) { + // dl_iterate_phdr is _not_ protected by a lock on Android < 23. + // Also return false here if we failed to get the version. + return false; + } +#endif + + dl_iterate_phdr( + [](dl_phdr_info*, size_t, void* lambda) -> int { + (*static_cast<Lambda*>(lambda))(); + // Return 1 to stop iterating. + return 1; + }, + &aLambda); + return true; +} + +} /* Anonymous namespace */ + +/** + * LibHandle + */ +LibHandle::~LibHandle() { free(path); } + +const char* LibHandle::GetName() const { + return path ? LeafName(path) : nullptr; +} + +size_t LibHandle::GetMappableLength() const { + if (!mappable) mappable = GetMappable(); + if (!mappable) return 0; + return mappable->GetLength(); +} + +void* LibHandle::MappableMMap(void* addr, size_t length, off_t offset) const { + if (!mappable) mappable = GetMappable(); + if (!mappable) return MAP_FAILED; + void* mapped = mappable->mmap(addr, length, PROT_READ, MAP_PRIVATE, offset); + return mapped; +} + +void LibHandle::MappableMUnmap(void* addr, size_t length) const { + if (mappable) mappable->munmap(addr, length); +} + +/** + * SystemElf + */ +already_AddRefed<LibHandle> SystemElf::Load(const char* path, int flags) { + /* The Android linker returns a handle when the file name matches an + * already loaded library, even when the full path doesn't exist */ + if (path && path[0] == '/' && (access(path, F_OK) == -1)) { + DEBUG_LOG("dlopen(\"%s\", 0x%x) = %p", path, flags, (void*)nullptr); + ElfLoader::Singleton.lastError = "Specified file does not exist"; + return nullptr; + } + + ElfLoader::Singleton.lastError = nullptr; // Use system dlerror. + void* handle = dlopen(path, flags); + DEBUG_LOG("dlopen(\"%s\", 0x%x) = %p", path, flags, handle); + if (handle) { + SystemElf* elf = new SystemElf(path, handle); + ElfLoader::Singleton.Register(elf); + RefPtr<LibHandle> lib(elf); + return lib.forget(); + } + return nullptr; +} + +SystemElf::~SystemElf() { + if (!dlhandle) return; + DEBUG_LOG("dlclose(%p [\"%s\"])", dlhandle, GetPath()); + ElfLoader::Singleton.lastError = nullptr; // Use system dlerror. + dlclose(dlhandle); + ElfLoader::Singleton.Forget(this); +} + +void* SystemElf::GetSymbolPtr(const char* symbol) const { + ElfLoader::Singleton.lastError = nullptr; // Use system dlerror. + void* sym = dlsym(dlhandle, symbol); + DEBUG_LOG("dlsym(%p [\"%s\"], \"%s\") = %p", dlhandle, GetPath(), symbol, + sym); + return sym; +} + +Mappable* SystemElf::GetMappable() const { + const char* path = GetPath(); + if (!path) return nullptr; +#ifdef ANDROID + /* On Android, if we don't have the full path, try in /system/lib */ + const char* name = LeafName(path); + std::string systemPath; + if (name == path) { + systemPath = "/system/lib/"; + systemPath += path; + path = systemPath.c_str(); + } +#endif + + return MappableFile::Create(path); +} + +#ifdef __ARM_EABI__ +const void* SystemElf::FindExidx(int* pcount) const { + /* TODO: properly implement when ElfLoader::GetHandleByPtr + does return SystemElf handles */ + *pcount = 0; + return nullptr; +} +#endif + +/** + * ElfLoader + */ + +/* Unique ElfLoader instance */ +ElfLoader ElfLoader::Singleton; + +already_AddRefed<LibHandle> ElfLoader::Load(const char* path, int flags, + LibHandle* parent) { + /* Ensure logging is initialized or refresh if environment changed. */ + Logging::Init(); + + /* Ensure self_elf initialization. */ + if (!self_elf) Init(); + + RefPtr<LibHandle> handle; + + /* Handle dlopen(nullptr) directly. */ + if (!path) { + handle = SystemElf::Load(nullptr, flags); + return handle.forget(); + } + + /* TODO: Handle relative paths correctly */ + const char* name = LeafName(path); + + /* Search the list of handles we already have for a match. When the given + * path is not absolute, compare file names, otherwise compare full paths. */ + if (name == path) { + AutoLock lock(&handlesMutex); + for (LibHandleList::iterator it = handles.begin(); it < handles.end(); ++it) + if ((*it)->GetName() && (strcmp((*it)->GetName(), name) == 0)) { + handle = *it; + return handle.forget(); + } + } else { + AutoLock lock(&handlesMutex); + for (LibHandleList::iterator it = handles.begin(); it < handles.end(); ++it) + if ((*it)->GetPath() && (strcmp((*it)->GetPath(), path) == 0)) { + handle = *it; + return handle.forget(); + } + } + + char* abs_path = nullptr; + const char* requested_path = path; + + /* When the path is not absolute and the library is being loaded for + * another, first try to load the library from the directory containing + * that parent library. */ + if ((name == path) && parent) { + const char* parentPath = parent->GetPath(); + abs_path = new char[strlen(parentPath) + strlen(path)]; + strcpy(abs_path, parentPath); + char* slash = strrchr(abs_path, '/'); + strcpy(slash + 1, path); + path = abs_path; + } + + Mappable* mappable = GetMappableFromPath(path); + + /* Try loading with the custom linker if we have a Mappable */ + if (mappable) handle = CustomElf::Load(mappable, path, flags); + + /* Try loading with the system linker if everything above failed */ + if (!handle) handle = SystemElf::Load(path, flags); + + /* If we didn't have an absolute path and haven't been able to load + * a library yet, try in the system search path */ + if (!handle && abs_path) handle = SystemElf::Load(name, flags); + + delete[] abs_path; + DEBUG_LOG("ElfLoader::Load(\"%s\", 0x%x, %p [\"%s\"]) = %p", requested_path, + flags, reinterpret_cast<void*>(parent), + parent ? parent->GetPath() : "", static_cast<void*>(handle)); + + return handle.forget(); +} + +already_AddRefed<LibHandle> ElfLoader::GetHandleByPtr(void* addr) { + AutoLock lock(&handlesMutex); + /* Scan the list of handles we already have for a match */ + for (LibHandleList::iterator it = handles.begin(); it < handles.end(); ++it) { + if ((*it)->Contains(addr)) { + RefPtr<LibHandle> lib = *it; + return lib.forget(); + } + } + return nullptr; +} + +Mappable* ElfLoader::GetMappableFromPath(const char* path) { + const char* name = LeafName(path); + Mappable* mappable = nullptr; + RefPtr<Zip> zip; + const char* subpath; + if ((subpath = strchr(path, '!'))) { + char* zip_path = strndup(path, subpath - path); + while (*(++subpath) == '/') { + } + zip = ZipCollection::GetZip(zip_path); + free(zip_path); + Zip::Stream s; + if (zip && zip->GetStream(subpath, &s)) { + /* When the MOZ_LINKER_EXTRACT environment variable is set to "1", + * compressed libraries are going to be (temporarily) extracted as + * files, in the directory pointed by the MOZ_LINKER_CACHE + * environment variable. */ + const char* extract = getenv("MOZ_LINKER_EXTRACT"); + if (extract && !strncmp(extract, "1", 2 /* Including '\0' */)) + mappable = MappableExtractFile::Create(name, zip, &s); + if (!mappable) { + if (s.GetType() == Zip::Stream::DEFLATE) { + mappable = MappableDeflate::Create(name, zip, &s); + } + } + } + } + /* If we couldn't load above, try with a MappableFile */ + if (!mappable && !zip) mappable = MappableFile::Create(path); + + return mappable; +} + +void ElfLoader::Register(LibHandle* handle) { + AutoLock lock(&handlesMutex); + handles.push_back(handle); +} + +void ElfLoader::Register(CustomElf* handle) { + Register(static_cast<LibHandle*>(handle)); + if (dbg) { + // We could race with the system linker when modifying the debug map, so + // only do so while holding the system linker's internal lock. + RunWithSystemLinkerLock([this, handle] { dbg.Add(handle); }); + } +} + +void ElfLoader::Forget(LibHandle* handle) { + /* Ensure logging is initialized or refresh if environment changed. */ + Logging::Init(); + + AutoLock lock(&handlesMutex); + LibHandleList::iterator it = + std::find(handles.begin(), handles.end(), handle); + if (it != handles.end()) { + DEBUG_LOG("ElfLoader::Forget(%p [\"%s\"])", reinterpret_cast<void*>(handle), + handle->GetPath()); + handles.erase(it); + } else { + DEBUG_LOG("ElfLoader::Forget(%p [\"%s\"]): Handle not found", + reinterpret_cast<void*>(handle), handle->GetPath()); + } +} + +void ElfLoader::Forget(CustomElf* handle) { + Forget(static_cast<LibHandle*>(handle)); + if (dbg) { + // We could race with the system linker when modifying the debug map, so + // only do so while holding the system linker's internal lock. + RunWithSystemLinkerLock([this, handle] { dbg.Remove(handle); }); + } +} + +void ElfLoader::Init() { + Dl_info info; + /* On Android < 4.1 can't reenter dl* functions. So when the library + * containing this code is dlopen()ed, it can't call dladdr from a + * static initializer. */ + if (dladdr(_DYNAMIC, &info) != 0) { + self_elf = LoadedElf::Create(info.dli_fname, info.dli_fbase); + } +#if defined(ANDROID) + // On Android < 5.0, resolving weak symbols via dlsym doesn't work. + // The weak symbols Gecko uses are in either libc or libm, so we + // wrap those such that this linker does symbol resolution for them. + if (GetAndroidSDKVersion() < 21) { + if (dladdr(FunctionPtr(syscall), &info) != 0) { + libc = LoadedElf::Create(info.dli_fname, info.dli_fbase); + } + if (dladdr(FunctionPtr<int (*)(double)>(isnan), &info) != 0) { + libm = LoadedElf::Create(info.dli_fname, info.dli_fbase); + } + } +#endif +} + +ElfLoader::~ElfLoader() { + LibHandleList list; + + if (!Singleton.IsShutdownExpected()) { + MOZ_CRASH("Unexpected shutdown"); + } + + /* Release self_elf and libc */ + self_elf = nullptr; +#if defined(ANDROID) + libc = nullptr; + libm = nullptr; +#endif + + AutoLock lock(&handlesMutex); + /* Build up a list of all library handles with direct (external) references. + * We actually skip system library handles because we want to keep at least + * some of these open. Most notably, Mozilla codebase keeps a few libgnome + * libraries deliberately open because of the mess that libORBit destruction + * is. dlclose()ing these libraries actually leads to problems. */ + for (LibHandleList::reverse_iterator it = handles.rbegin(); + it < handles.rend(); ++it) { + if ((*it)->DirectRefCount()) { + if (SystemElf* se = (*it)->AsSystemElf()) { + se->Forget(); + } else { + list.push_back(*it); + } + } + } + /* Force release all external references to the handles collected above */ + for (LibHandleList::iterator it = list.begin(); it < list.end(); ++it) { + while ((*it)->ReleaseDirectRef()) { + } + } + /* Remove the remaining system handles. */ + if (handles.size()) { + list = handles; + for (LibHandleList::reverse_iterator it = list.rbegin(); it < list.rend(); + ++it) { + if ((*it)->AsSystemElf()) { + DEBUG_LOG( + "ElfLoader::~ElfLoader(): Remaining handle for \"%s\" " + "[%" PRIdPTR " direct refs, %" PRIdPTR " refs total]", + (*it)->GetPath(), (*it)->DirectRefCount(), (*it)->refCount()); + } else { + DEBUG_LOG( + "ElfLoader::~ElfLoader(): Unexpected remaining handle for \"%s\" " + "[%" PRIdPTR " direct refs, %" PRIdPTR " refs total]", + (*it)->GetPath(), (*it)->DirectRefCount(), (*it)->refCount()); + /* Not removing, since it could have references to other libraries, + * destroying them as a side effect, and possibly leaving dangling + * pointers in the handle list we're scanning */ + } + } + } + pthread_mutex_destroy(&handlesMutex); +} + +#ifdef __ARM_EABI__ +int ElfLoader::__wrap_aeabi_atexit(void* that, ElfLoader::Destructor destructor, + void* dso_handle) { + Singleton.destructors.push_back( + DestructorCaller(destructor, that, dso_handle)); + return 0; +} +#else +int ElfLoader::__wrap_cxa_atexit(ElfLoader::Destructor destructor, void* that, + void* dso_handle) { + Singleton.destructors.push_back( + DestructorCaller(destructor, that, dso_handle)); + return 0; +} +#endif + +void ElfLoader::__wrap_cxa_finalize(void* dso_handle) { + /* Call all destructors for the given DSO handle in reverse order they were + * registered. */ + std::vector<DestructorCaller>::reverse_iterator it; + for (it = Singleton.destructors.rbegin(); it < Singleton.destructors.rend(); + ++it) { + if (it->IsForHandle(dso_handle)) { + it->Call(); + } + } +} + +void ElfLoader::DestructorCaller::Call() { + if (destructor) { + DEBUG_LOG("ElfLoader::DestructorCaller::Call(%p, %p, %p)", + FunctionPtr(destructor), object, dso_handle); + destructor(object); + destructor = nullptr; + } +} + +ElfLoader::DebuggerHelper::DebuggerHelper() + : dbg(nullptr), firstAdded(nullptr) { + /* Find ELF auxiliary vectors. + * + * The kernel stores the following data on the stack when starting a + * program: + * argc + * argv[0] (pointer into argv strings defined below) + * argv[1] (likewise) + * ... + * argv[argc - 1] (likewise) + * nullptr + * envp[0] (pointer into environment strings defined below) + * envp[1] (likewise) + * ... + * envp[n] (likewise) + * nullptr + * ... (more NULLs on some platforms such as Android 4.3) + * auxv[0] (first ELF auxiliary vector) + * auxv[1] (second ELF auxiliary vector) + * ... + * auxv[p] (last ELF auxiliary vector) + * (AT_NULL, nullptr) + * padding + * argv strings, separated with '\0' + * environment strings, separated with '\0' + * nullptr + * + * What we are after are the auxv values defined by the following struct. + */ + struct AuxVector { + Elf::Addr type; + Elf::Addr value; + }; + + /* Pointer to the environment variables list */ + extern char** environ; + + /* The environment may have changed since the program started, in which + * case the environ variables list isn't the list the kernel put on stack + * anymore. But in this new list, variables that didn't change still point + * to the strings the kernel put on stack. It is quite unlikely that two + * modified environment variables point to two consecutive strings in memory, + * so we assume that if two consecutive environment variables point to two + * consecutive strings, we found strings the kernel put on stack. */ + char** env; + for (env = environ; *env; env++) + if (*env + strlen(*env) + 1 == env[1]) break; + if (!*env) return; + + /* Next, we scan the stack backwards to find a pointer to one of those + * strings we found above, which will give us the location of the original + * envp list. As we are looking for pointers, we need to look at 32-bits or + * 64-bits aligned values, depening on the architecture. */ + char** scan = reinterpret_cast<char**>(reinterpret_cast<uintptr_t>(*env) & + ~(sizeof(void*) - 1)); + while (*env != *scan) scan--; + + /* Finally, scan forward to find the last environment variable pointer and + * thus the first auxiliary vector. */ + while (*scan++) + ; + + /* Some platforms have more NULLs here, so skip them if we encounter them */ + while (!*scan) scan++; + + AuxVector* auxv = reinterpret_cast<AuxVector*>(scan); + + /* The two values of interest in the auxiliary vectors are AT_PHDR and + * AT_PHNUM, which gives us the the location and size of the ELF program + * headers. */ + Array<Elf::Phdr> phdrs; + char* base = nullptr; + while (auxv->type) { + if (auxv->type == AT_PHDR) { + phdrs.Init(reinterpret_cast<Elf::Phdr*>(auxv->value)); + /* Assume the base address is the first byte of the same page */ + base = reinterpret_cast<char*>(PageAlignedPtr(auxv->value)); + } + if (auxv->type == AT_PHNUM) phdrs.Init(auxv->value); + auxv++; + } + + if (!phdrs) { + DEBUG_LOG("Couldn't find program headers"); + return; + } + + /* In some cases, the address for the program headers we get from the + * auxiliary vectors is not mapped, because of the PT_LOAD segments + * definitions in the program executable. Trying to map anonymous memory + * with a hint giving the base address will return a different address + * if something is mapped there, and the base address otherwise. */ + MappedPtr mem(MemoryRange::mmap(base, PageSize(), PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + if (mem == base) { + /* If program headers aren't mapped, try to map them */ + int fd = open("/proc/self/exe", O_RDONLY); + if (fd == -1) { + DEBUG_LOG("Failed to open /proc/self/exe"); + return; + } + mem.Assign( + MemoryRange::mmap(base, PageSize(), PROT_READ, MAP_PRIVATE, fd, 0)); + /* If we don't manage to map at the right address, just give up. */ + if (mem != base) { + DEBUG_LOG("Couldn't read program headers"); + return; + } + } + /* Sanity check: the first bytes at the base address should be an ELF + * header. */ + if (!Elf::Ehdr::validate(base)) { + DEBUG_LOG("Couldn't find program base"); + return; + } + + /* Search for the program PT_DYNAMIC segment */ + Array<Elf::Dyn> dyns; + for (Array<Elf::Phdr>::iterator phdr = phdrs.begin(); phdr < phdrs.end(); + ++phdr) { + /* While the program headers are expected within the first mapped page of + * the program executable, the executable PT_LOADs may actually make them + * loaded at an address that is not the wanted base address of the + * library. We thus need to adjust the base address, compensating for the + * virtual address of the PT_LOAD segment corresponding to offset 0. */ + if (phdr->p_type == PT_LOAD && phdr->p_offset == 0) base -= phdr->p_vaddr; + if (phdr->p_type == PT_DYNAMIC) + dyns.Init(base + phdr->p_vaddr, phdr->p_filesz); + } + if (!dyns) { + DEBUG_LOG("Failed to find PT_DYNAMIC section in program"); + return; + } + + /* Search for the DT_DEBUG information */ + for (Array<Elf::Dyn>::iterator dyn = dyns.begin(); dyn < dyns.end(); ++dyn) { + if (dyn->d_tag == DT_DEBUG) { + dbg = reinterpret_cast<r_debug*>(dyn->d_un.d_ptr); + break; + } + } + DEBUG_LOG("DT_DEBUG points at %p", static_cast<void*>(dbg)); +} + +/** + * Helper class to ensure the given pointer is writable within the scope of + * an instance. Permissions to the memory page where the pointer lies are + * restored to their original value when the instance is destroyed. + */ +class EnsureWritable { + public: + template <typename T> + explicit EnsureWritable(T* ptr, size_t length_ = sizeof(T)) { + MOZ_ASSERT(length_ < PageSize()); + prot = -1; + page = MAP_FAILED; + + char* firstPage = PageAlignedPtr(reinterpret_cast<char*>(ptr)); + char* lastPageEnd = + PageAlignedEndPtr(reinterpret_cast<char*>(ptr) + length_); + length = lastPageEnd - firstPage; + uintptr_t start = reinterpret_cast<uintptr_t>(firstPage); + uintptr_t end; + + prot = getProt(start, &end); + if (prot == -1 || (start + length) > end) MOZ_CRASH(); + + if (prot & PROT_WRITE) { + success = true; + return; + } + + page = firstPage; + int ret = mprotect(page, length, prot | PROT_WRITE); + success = ret == 0; + if (!success) { + ERROR("mprotect(%p, %zu, %d) = %d (errno=%d; %s)", page, length, + prot | PROT_WRITE, ret, errno, strerror(errno)); + } + } + + bool IsWritable() const { return success; } + + ~EnsureWritable() { + if (success && page != MAP_FAILED) { + mprotect(page, length, prot); + } + } + + private: + int getProt(uintptr_t addr, uintptr_t* end) { + /* The interesting part of the /proc/self/maps format looks like: + * startAddr-endAddr rwxp */ + int result = 0; + AutoCloseFILE f(fopen("/proc/self/maps", "r")); + while (f) { + unsigned long long startAddr, endAddr; + char perms[5]; + if (fscanf(f, "%llx-%llx %4s %*1024[^\n] ", &startAddr, &endAddr, + perms) != 3) + return -1; + if (addr < startAddr || addr >= endAddr) continue; + if (perms[0] == 'r') + result |= PROT_READ; + else if (perms[0] != '-') + return -1; + if (perms[1] == 'w') + result |= PROT_WRITE; + else if (perms[1] != '-') + return -1; + if (perms[2] == 'x') + result |= PROT_EXEC; + else if (perms[2] != '-') + return -1; + *end = endAddr; + return result; + } + return -1; + } + + int prot; + void* page; + size_t length; + bool success; +}; + +/** + * The system linker maintains a doubly linked list of library it loads + * for use by the debugger. Unfortunately, it also uses the list pointers + * in a lot of operations and adding our data in the list is likely to + * trigger crashes when the linker tries to use data we don't provide or + * that fall off the amount data we allocated. Fortunately, the linker only + * traverses the list forward and accesses the head of the list from a + * private pointer instead of using the value in the r_debug structure. + * This means we can safely add members at the beginning of the list. + * Unfortunately, gdb checks the coherency of l_prev values, so we have + * to adjust the l_prev value for the first element the system linker + * knows about. Fortunately, it doesn't use l_prev, and the first element + * is not ever going to be released before our elements, since it is the + * program executable, so the system linker should not be changing + * r_debug::r_map. + */ +void ElfLoader::DebuggerHelper::Add(ElfLoader::link_map* map) { + if (!dbg->r_brk) return; + + dbg->r_state = r_debug::RT_ADD; + dbg->r_brk(); + + if (!firstAdded) { + /* When adding a library for the first time, r_map points to data + * handled by the system linker, and that data may be read-only */ + EnsureWritable w(&dbg->r_map->l_prev); + if (!w.IsWritable()) { + dbg->r_state = r_debug::RT_CONSISTENT; + dbg->r_brk(); + return; + } + + firstAdded = map; + dbg->r_map->l_prev = map; + } else + dbg->r_map->l_prev = map; + + map->l_prev = nullptr; + map->l_next = dbg->r_map; + + dbg->r_map = map; + dbg->r_state = r_debug::RT_CONSISTENT; + dbg->r_brk(); +} + +void ElfLoader::DebuggerHelper::Remove(ElfLoader::link_map* map) { + if (!dbg->r_brk) return; + + dbg->r_state = r_debug::RT_DELETE; + dbg->r_brk(); + + if (map == firstAdded) { + /* When removing the first added library, its l_next is going to be + * data handled by the system linker, and that data may be read-only */ + EnsureWritable w(&map->l_next->l_prev); + if (!w.IsWritable()) { + dbg->r_state = r_debug::RT_CONSISTENT; + dbg->r_brk(); + return; + } + + firstAdded = map->l_prev; + map->l_next->l_prev = map->l_prev; + } else if (map->l_next) { + map->l_next->l_prev = map->l_prev; + } + + if (dbg->r_map == map) + dbg->r_map = map->l_next; + else if (map->l_prev) { + map->l_prev->l_next = map->l_next; + } + dbg->r_state = r_debug::RT_CONSISTENT; + dbg->r_brk(); +} + +#if defined(ANDROID) && defined(__NR_sigaction) +/* As some system libraries may be calling signal() or sigaction() to + * set a SIGSEGV handler, effectively breaking MappableSeekableZStream, + * or worse, restore our SIGSEGV handler with wrong flags (which using + * signal() will do), we want to hook into the system's sigaction() to + * replace it with our own wrapper instead, so that our handler is never + * replaced. We used to only do that with libraries this linker loads, + * but it turns out at least one system library does call signal() and + * breaks us (libsc-a3xx.so on the Samsung Galaxy S4). + * As libc's signal (bsd_signal/sysv_signal, really) calls sigaction + * under the hood, instead of calling the signal system call directly, + * we only need to hook sigaction. This is true for both bionic and + * glibc. + */ + +/* libc's sigaction */ +extern "C" int sigaction(int signum, const struct sigaction* act, + struct sigaction* oldact); + +/* Simple reimplementation of sigaction. This is roughly equivalent + * to the assembly that comes in bionic, but not quite equivalent to + * glibc's implementation, so we only use this on Android. */ +int sys_sigaction(int signum, const struct sigaction* act, + struct sigaction* oldact) { + return syscall(__NR_sigaction, signum, act, oldact); +} + +/* Replace the first instructions of the given function with a jump + * to the given new function. */ +template <typename T> +static bool Divert(T func, T new_func) { + void* ptr = FunctionPtr(func); + uintptr_t addr = reinterpret_cast<uintptr_t>(ptr); + +# if defined(__i386__) + // A 32-bit jump is a 5 bytes instruction. + EnsureWritable w(ptr, 5); + *reinterpret_cast<unsigned char*>(addr) = 0xe9; // jmp + *reinterpret_cast<intptr_t*>(addr + 1) = + reinterpret_cast<uintptr_t>(new_func) - addr - 5; // target displacement + return true; +# elif defined(__arm__) || defined(__aarch64__) + const unsigned char trampoline[] = { +# ifdef __arm__ + // .thumb + 0x46, 0x04, // nop + 0x78, 0x47, // bx pc + 0x46, 0x04, // nop + // .arm + 0x04, 0xf0, 0x1f, 0xe5, // ldr pc, [pc, #-4] + // .word <new_func> +# else // __aarch64__ + 0x50, 0x00, + 0x00, 0x58, // ldr x16, [pc, #8] ; x16 (aka ip0) is the first + 0x00, 0x02, + 0x1f, 0xd6, // br x16 ; intra-procedure-call + // .word <new_func.lo> ; scratch register. + // .word <new_func.hi> +# endif + }; + const unsigned char* start; +# ifdef __arm__ + if (addr & 0x01) { + /* Function is thumb, the actual address of the code is without the + * least significant bit. */ + addr--; + /* The arm part of the trampoline needs to be 32-bit aligned */ + if (addr & 0x02) + start = trampoline; + else + start = trampoline + 2; + } else { + /* Function is arm, we only need the arm part of the trampoline */ + start = trampoline + 6; + } +# else // __aarch64__ + start = trampoline; +# endif + + size_t len = sizeof(trampoline) - (start - trampoline); + EnsureWritable w(reinterpret_cast<void*>(addr), len + sizeof(void*)); + memcpy(reinterpret_cast<void*>(addr), start, len); + *reinterpret_cast<void**>(addr + len) = FunctionPtr(new_func); + __builtin___clear_cache(reinterpret_cast<char*>(addr), + reinterpret_cast<char*>(addr + len + sizeof(void*))); + return true; +# else + return false; +# endif +} +#else +# define sys_sigaction sigaction +template <typename T> +static bool Divert(T func, T new_func) { + return false; +} +#endif + +namespace { + +/* Clock that only accounts for time spent in the current process. */ +static uint64_t ProcessTimeStamp_Now() { + struct timespec ts; + int rv = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); + + if (rv != 0) { + return 0; + } + + uint64_t baseNs = (uint64_t)ts.tv_sec * 1000000000; + return baseNs + (uint64_t)ts.tv_nsec; +} + +} // namespace + +/* Data structure used to pass data to the temporary signal handler, + * as well as triggering a test crash. */ +struct TmpData { + volatile int crash_int; + volatile uint64_t crash_timestamp; +}; + +SEGVHandler::SEGVHandler() + : initialized(false), + registeredHandler(false), + signalHandlingBroken(true), + signalHandlingSlow(true) { + /* Ensure logging is initialized before the DEBUG_LOG in the test_handler. + * As this constructor runs before the ElfLoader constructor (by effect + * of ElfLoader inheriting from this class), this also initializes on behalf + * of ElfLoader and DebuggerHelper. */ + Logging::Init(); + + /* Initialize oldStack.ss_flags to an invalid value when used to set + * an alternative stack, meaning we haven't got information about the + * original alternative stack and thus don't mean to restore it in + * the destructor. */ + oldStack.ss_flags = SS_ONSTACK; + + /* Get the current segfault signal handler. */ + struct sigaction old_action; + sys_sigaction(SIGSEGV, nullptr, &old_action); + + /* Some devices don't provide useful information to their SIGSEGV handlers, + * making it impossible for on-demand decompression to work. To check if + * we're on such a device, setup a temporary handler and deliberately + * trigger a segfault. The handler will set signalHandlingBroken if the + * provided information is bogus. + * Some other devices have a kernel option enabled that makes SIGSEGV handler + * have an overhead so high that it affects how on-demand decompression + * performs. The handler will also set signalHandlingSlow if the triggered + * SIGSEGV took too much time. */ + struct sigaction action; + action.sa_sigaction = &SEGVHandler::test_handler; + sigemptyset(&action.sa_mask); + action.sa_flags = SA_SIGINFO | SA_NODEFER; + action.sa_restorer = nullptr; + stackPtr.Assign(MemoryRange::mmap(nullptr, PageSize(), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + if (stackPtr.get() == MAP_FAILED) return; + if (sys_sigaction(SIGSEGV, &action, nullptr)) return; + + TmpData* data = reinterpret_cast<TmpData*>(stackPtr.get()); + data->crash_timestamp = ProcessTimeStamp_Now(); + mprotect(stackPtr, stackPtr.GetLength(), PROT_NONE); + data->crash_int = 123; + /* Restore the original segfault signal handler. */ + sys_sigaction(SIGSEGV, &old_action, nullptr); + stackPtr.Assign(MAP_FAILED, 0); +} + +void SEGVHandler::FinishInitialization() { + /* Ideally, we'd need some locking here, but in practice, we're not + * going to race with another thread. */ + initialized = true; + + if (signalHandlingBroken || signalHandlingSlow) return; + + typedef int (*sigaction_func)(int, const struct sigaction*, + struct sigaction*); + + sigaction_func libc_sigaction; + +#if defined(ANDROID) + /* Android > 4.4 comes with a sigaction wrapper in a LD_PRELOADed library + * (libsigchain) for ART. That wrapper kind of does the same trick as we + * do, so we need extra care in handling it. + * - Divert the libc's sigaction, assuming the LD_PRELOADed library uses + * it under the hood (which is more or less true according to the source + * of that library, since it's doing a lookup in RTLD_NEXT) + * - With the LD_PRELOADed library in place, all calls to sigaction from + * from system libraries will go to the LD_PRELOADed library. + * - The LD_PRELOADed library calls to sigaction go to our __wrap_sigaction. + * - The calls to sigaction from libraries faulty.lib loads are sent to + * the LD_PRELOADed library. + * In practice, for signal handling, this means: + * - The signal handler registered to the kernel is ours. + * - Our handler redispatches to the LD_PRELOADed library's if there's a + * segfault we don't handle. + * - The LD_PRELOADed library redispatches according to whatever system + * library or faulty.lib-loaded library set with sigaction. + * + * When there is no sigaction wrapper in place: + * - Divert the libc's sigaction. + * - Calls to sigaction from system library and faulty.lib-loaded libraries + * all go to the libc's sigaction, which end up in our __wrap_sigaction. + * - The signal handler registered to the kernel is ours. + * - Our handler redispatches according to whatever system library or + * faulty.lib-loaded library set with sigaction. + */ + void* libc = dlopen("libc.so", RTLD_GLOBAL | RTLD_LAZY); + if (libc) { + /* + * Lollipop bionic only has a small trampoline in sigaction, with the real + * work happening in __sigaction. Divert there instead of sigaction if it + * exists. Bug 1154803 + */ + libc_sigaction = + reinterpret_cast<sigaction_func>(dlsym(libc, "__sigaction")); + + if (!libc_sigaction) { + libc_sigaction = + reinterpret_cast<sigaction_func>(dlsym(libc, "sigaction")); + } + } else +#endif + { + libc_sigaction = sigaction; + } + + if (!Divert(libc_sigaction, __wrap_sigaction)) return; + + /* Setup an alternative stack if the already existing one is not big + * enough, or if there is none. */ + if (sigaltstack(nullptr, &oldStack) == 0) { + if (oldStack.ss_flags == SS_ONSTACK) oldStack.ss_flags = 0; + if (!oldStack.ss_sp || oldStack.ss_size < stackSize) { + stackPtr.Assign(MemoryRange::mmap(nullptr, stackSize, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + if (stackPtr.get() == MAP_FAILED) return; + stack_t stack; + stack.ss_sp = stackPtr; + stack.ss_size = stackSize; + stack.ss_flags = 0; + if (sigaltstack(&stack, nullptr) != 0) return; + } + } + /* Register our own handler, and store the already registered one in + * SEGVHandler's struct sigaction member */ + action.sa_sigaction = &SEGVHandler::handler; + action.sa_flags = SA_SIGINFO | SA_NODEFER | SA_ONSTACK; + registeredHandler = !sys_sigaction(SIGSEGV, &action, &this->action); +} + +SEGVHandler::~SEGVHandler() { + /* Restore alternative stack for signals */ + if (oldStack.ss_flags != SS_ONSTACK) sigaltstack(&oldStack, nullptr); + /* Restore original signal handler */ + if (registeredHandler) sys_sigaction(SIGSEGV, &this->action, nullptr); +} + +/* Test handler for a deliberately triggered SIGSEGV that determines whether + * useful information is provided to signal handlers, particularly whether + * si_addr is filled in properly, and whether the segfault handler is called + * quickly enough. */ +void SEGVHandler::test_handler(int signum, siginfo_t* info, void* context) { + SEGVHandler& that = ElfLoader::Singleton; + if (signum == SIGSEGV && info && info->si_addr == that.stackPtr.get()) + that.signalHandlingBroken = false; + mprotect(that.stackPtr, that.stackPtr.GetLength(), PROT_READ | PROT_WRITE); + TmpData* data = reinterpret_cast<TmpData*>(that.stackPtr.get()); + uint64_t latency = ProcessTimeStamp_Now() - data->crash_timestamp; + DEBUG_LOG("SEGVHandler latency: %" PRIu64, latency); + /* See bug 886736 for timings on different devices, 150 µs is reasonably above + * the latency on "working" devices and seems to be short enough to not incur + * a huge overhead to on-demand decompression. */ + if (latency <= 150000) that.signalHandlingSlow = false; +} + +/* TODO: "properly" handle signal masks and flags */ +void SEGVHandler::handler(int signum, siginfo_t* info, void* context) { + // ASSERT(signum == SIGSEGV); + DEBUG_LOG("Caught segmentation fault @%p", info->si_addr); + + /* Redispatch to the registered handler */ + SEGVHandler& that = ElfLoader::Singleton; + if (that.action.sa_flags & SA_SIGINFO) { + DEBUG_LOG("Redispatching to registered handler @%p", + FunctionPtr(that.action.sa_sigaction)); + that.action.sa_sigaction(signum, info, context); + } else if (that.action.sa_handler == SIG_DFL) { + DEBUG_LOG("Redispatching to default handler"); + /* Reset the handler to the default one, and trigger it. */ + sys_sigaction(signum, &that.action, nullptr); + raise(signum); + } else if (that.action.sa_handler != SIG_IGN) { + DEBUG_LOG("Redispatching to registered handler @%p", + FunctionPtr(that.action.sa_handler)); + that.action.sa_handler(signum); + } else { + DEBUG_LOG("Ignoring"); + } +} + +int SEGVHandler::__wrap_sigaction(int signum, const struct sigaction* act, + struct sigaction* oldact) { + SEGVHandler& that = ElfLoader::Singleton; + + /* Use system sigaction() function for all but SIGSEGV signals. */ + if (!that.registeredHandler || (signum != SIGSEGV)) + return sys_sigaction(signum, act, oldact); + + if (oldact) *oldact = that.action; + if (act) that.action = *act; + return 0; +} diff --git a/mozglue/linker/ElfLoader.h b/mozglue/linker/ElfLoader.h new file mode 100644 index 0000000000..059c092f6d --- /dev/null +++ b/mozglue/linker/ElfLoader.h @@ -0,0 +1,634 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ElfLoader_h +#define ElfLoader_h + +#include <vector> +#include <dlfcn.h> +#include <signal.h> +#include "mozilla/Atomics.h" +#include "mozilla/RefCounted.h" +#include "mozilla/RefPtr.h" +#include "mozilla/UniquePtr.h" +#include "Zip.h" +#include "Elfxx.h" +#include "Mappable.h" + +/** + * dlfcn.h replacement functions + */ +extern "C" { +void* __wrap_dlopen(const char* path, int flags); +const char* __wrap_dlerror(void); +void* __wrap_dlsym(void* handle, const char* symbol); +int __wrap_dlclose(void* handle); + +#ifndef HAVE_DLADDR +typedef struct { + const char* dli_fname; + void* dli_fbase; + const char* dli_sname; + void* dli_saddr; +} Dl_info; +#endif +int __wrap_dladdr(const void* addr, Dl_info* info); + +struct dl_phdr_info { + Elf::Addr dlpi_addr; + const char* dlpi_name; + const Elf::Phdr* dlpi_phdr; + Elf::Half dlpi_phnum; +}; + +typedef int (*dl_phdr_cb)(struct dl_phdr_info*, size_t, void*); +int __wrap_dl_iterate_phdr(dl_phdr_cb callback, void* data); + +#ifdef __ARM_EABI__ +const void* __wrap___gnu_Unwind_Find_exidx(void* pc, int* pcount); +#endif + +/** + * faulty.lib public API + */ +MFBT_API size_t __dl_get_mappable_length(void* handle); + +MFBT_API void* __dl_mmap(void* handle, void* addr, size_t length, off_t offset); + +MFBT_API void __dl_munmap(void* handle, void* addr, size_t length); + +MFBT_API bool IsSignalHandlingBroken(); +} + +/* Forward declarations for use in LibHandle */ +class BaseElf; +class CustomElf; +class SystemElf; + +/** + * Specialize RefCounted template for LibHandle. We may get references to + * LibHandles during the execution of their destructor, so we need + * RefCounted<LibHandle>::Release to support some reentrancy. See further + * below. + */ +class LibHandle; + +namespace mozilla { +namespace detail { + +template <> +inline void RefCounted<LibHandle, AtomicRefCount>::Release() const; + +#ifdef DEBUG +template <> +inline RefCounted<LibHandle, AtomicRefCount>::~RefCounted() { + MOZ_ASSERT(mRefCnt == 0x7fffdead); +} +#endif + +} /* namespace detail */ +} /* namespace mozilla */ + +/** + * Abstract class for loaded libraries. Libraries may be loaded through the + * system linker or this linker, both cases will be derived from this class. + */ +class LibHandle : public mozilla::external::AtomicRefCounted<LibHandle> { + public: + MOZ_DECLARE_REFCOUNTED_TYPENAME(LibHandle) + /** + * Constructor. Takes the path of the loaded library and will store a copy + * of the leaf name. + */ + LibHandle(const char* path) + : directRefCnt(0), + path(path ? strdup(path) : nullptr), + mappable(nullptr) {} + + /** + * Destructor. + */ + virtual ~LibHandle(); + + /** + * Returns the pointer to the address to which the given symbol resolves + * inside the library. It is not supposed to resolve the symbol in other + * libraries, although in practice, it will for system libraries. + */ + virtual void* GetSymbolPtr(const char* symbol) const = 0; + + /** + * Returns whether the given address is part of the virtual address space + * covered by the loaded library. + */ + virtual bool Contains(void* addr) const = 0; + + /** + * Returns the base address of the loaded library. + */ + virtual void* GetBase() const = 0; + + /** + * Returns the file name of the library without the containing directory. + */ + const char* GetName() const; + + /** + * Returns the full path of the library, when available. Otherwise, returns + * the file name. + */ + const char* GetPath() const { return path; } + + /** + * Library handles can be referenced from other library handles or + * externally (when dlopen()ing using this linker). We need to be + * able to distinguish between the two kind of referencing for better + * bookkeeping. + */ + void AddDirectRef() { + mozilla::external::AtomicRefCounted<LibHandle>::AddRef(); + ++directRefCnt; + } + + /** + * Releases a direct reference, and returns whether there are any direct + * references left. + */ + bool ReleaseDirectRef() { + const MozRefCountType count = --directRefCnt; + MOZ_ASSERT(count + 1 > 0); + MOZ_ASSERT(count + 1 <= + mozilla::external::AtomicRefCounted<LibHandle>::refCount()); + mozilla::external::AtomicRefCounted<LibHandle>::Release(); + return !!count; + } + + /** + * Returns the number of direct references + */ + MozRefCountType DirectRefCount() { return directRefCnt; } + + /** + * Returns the complete size of the file or stream behind the library + * handle. + */ + size_t GetMappableLength() const; + + /** + * Returns a memory mapping of the file or stream behind the library + * handle. + */ + void* MappableMMap(void* addr, size_t length, off_t offset) const; + + /** + * Unmaps a memory mapping of the file or stream behind the library + * handle. + */ + void MappableMUnmap(void* addr, size_t length) const; + +#ifdef __ARM_EABI__ + /** + * Find the address and entry count of the ARM.exidx section + * associated with the library + */ + virtual const void* FindExidx(int* pcount) const = 0; +#endif + + protected: + /** + * Returns a mappable object for use by MappableMMap and related functions. + */ + virtual Mappable* GetMappable() const = 0; + + /** + * Returns the instance, casted as the wanted type. Returns nullptr if + * that's not the actual type. (short of a better way to do this without + * RTTI) + */ + friend class ElfLoader; + friend class CustomElf; + friend class SEGVHandler; + friend int __wrap_dl_iterate_phdr(dl_phdr_cb callback, void* data); + virtual BaseElf* AsBaseElf() { return nullptr; } + virtual SystemElf* AsSystemElf() { return nullptr; } + + private: + mozilla::Atomic<MozRefCountType> directRefCnt; + char* path; + + /* Mappable object keeping the result of GetMappable() */ + mutable RefPtr<Mappable> mappable; +}; + +/** + * Specialized RefCounted<LibHandle>::Release. Under normal operation, when + * mRefCnt reaches 0, the LibHandle is deleted. Its mRefCnt is however + * increased to 1 on normal builds, and 0x7fffdead on debug builds so that the + * LibHandle can still be referenced while the destructor is executing. The + * mRefCnt is allowed to grow > 0x7fffdead, but not to decrease under that + * value, which would mean too many Releases from within the destructor. + */ +namespace mozilla { +namespace detail { + +template <> +inline void RefCounted<LibHandle, AtomicRefCount>::Release() const { +#ifdef DEBUG + if (mRefCnt > 0x7fff0000) MOZ_ASSERT(mRefCnt > 0x7fffdead); +#endif + MOZ_ASSERT(mRefCnt > 0); + if (mRefCnt > 0) { + if (0 == --mRefCnt) { +#ifdef DEBUG + mRefCnt = 0x7fffdead; +#else + ++mRefCnt; +#endif + delete static_cast<const LibHandle*>(this); + } + } +} + +} /* namespace detail */ +} /* namespace mozilla */ + +/** + * Class handling libraries loaded by the system linker + */ +class SystemElf : public LibHandle { + public: + /** + * Returns a new SystemElf for the given path. The given flags are passed + * to dlopen(). + */ + static already_AddRefed<LibHandle> Load(const char* path, int flags); + + /** + * Inherited from LibHandle + */ + virtual ~SystemElf(); + virtual void* GetSymbolPtr(const char* symbol) const; + virtual bool Contains(void* addr) const { return false; /* UNIMPLEMENTED */ } + virtual void* GetBase() const { return nullptr; /* UNIMPLEMENTED */ } + +#ifdef __ARM_EABI__ + virtual const void* FindExidx(int* pcount) const; +#endif + + protected: + virtual Mappable* GetMappable() const; + + /** + * Returns the instance, casted as SystemElf. (short of a better way to do + * this without RTTI) + */ + friend class ElfLoader; + virtual SystemElf* AsSystemElf() { return this; } + + /** + * Remove the reference to the system linker handle. This avoids dlclose() + * being called when the instance is destroyed. + */ + void Forget() { dlhandle = nullptr; } + + private: + /** + * Private constructor + */ + SystemElf(const char* path, void* handle) + : LibHandle(path), dlhandle(handle) {} + + /* Handle as returned by system dlopen() */ + void* dlhandle; +}; + +/** + * The ElfLoader registers its own SIGSEGV handler to handle segmentation + * faults within the address space of the loaded libraries. It however + * allows a handler to be set for faults in other places, and redispatches + * to the handler set through signal() or sigaction(). + */ +class SEGVHandler { + public: + bool hasRegisteredHandler() { + if (!initialized) FinishInitialization(); + return registeredHandler; + } + + bool isSignalHandlingBroken() { return signalHandlingBroken; } + + static int __wrap_sigaction(int signum, const struct sigaction* act, + struct sigaction* oldact); + + protected: + SEGVHandler(); + ~SEGVHandler(); + + private: + /** + * The constructor doesn't do all initialization, and the tail is done + * at a later time. + */ + void FinishInitialization(); + + /** + * SIGSEGV handler registered with __wrap_signal or __wrap_sigaction. + */ + struct sigaction action; + + /** + * ElfLoader SIGSEGV handler. + */ + static void handler(int signum, siginfo_t* info, void* context); + + /** + * Temporary test handler. + */ + static void test_handler(int signum, siginfo_t* info, void* context); + + /** + * Size of the alternative stack. The printf family requires more than 8KB + * of stack, and our signal handler may print a few things. + */ + static const size_t stackSize = 12 * 1024; + + /** + * Alternative stack information used before initialization. + */ + stack_t oldStack; + + /** + * Pointer to an alternative stack for signals. Only set if oldStack is + * not set or not big enough. + */ + MappedPtr stackPtr; + + bool initialized; + bool registeredHandler; + bool signalHandlingBroken; + bool signalHandlingSlow; +}; + +/** + * Elf Loader class in charge of loading and bookkeeping libraries. + */ +class ElfLoader : public SEGVHandler { + public: + /** + * The Elf Loader instance + */ + static ElfLoader Singleton; + + /** + * Loads the given library with the given flags. Equivalent to dlopen() + * The extra "parent" argument optionally gives the handle of the library + * requesting the given library to be loaded. The loader may look in the + * directory containing that parent library for the library to load. + */ + already_AddRefed<LibHandle> Load(const char* path, int flags, + LibHandle* parent = nullptr); + + /** + * Returns the handle of the library containing the given address in + * its virtual address space, i.e. the library handle for which + * LibHandle::Contains returns true. Its purpose is to allow to + * implement dladdr(). + */ + already_AddRefed<LibHandle> GetHandleByPtr(void* addr); + + /** + * Returns a Mappable object for the path. Paths in the form + * /foo/bar/baz/archive!/directory/lib.so + * try to load the directory/lib.so in /foo/bar/baz/archive, provided + * that file is a Zip archive. + */ + static Mappable* GetMappableFromPath(const char* path); + + void ExpectShutdown(bool val) { expect_shutdown = val; } + bool IsShutdownExpected() { return expect_shutdown; } + + private: + bool expect_shutdown; + + protected: + /** + * Registers the given handle. This method is meant to be called by + * LibHandle subclass creators. + */ + void Register(LibHandle* handle); + void Register(CustomElf* handle); + + /** + * Forget about the given handle. This method is meant to be called by + * LibHandle subclass destructors. + */ + void Forget(LibHandle* handle); + void Forget(CustomElf* handle); + + friend class SystemElf; + friend const char* __wrap_dlerror(void); + friend void* __wrap_dlsym(void* handle, const char* symbol); + friend int __wrap_dlclose(void* handle); + /* __wrap_dlerror() returns this custom last error if non-null or the system + * dlerror() value if this is null. Must refer to a string constant. */ + mozilla::Atomic<const char*, mozilla::Relaxed> lastError; + + private: + ElfLoader() : expect_shutdown(true), lastError(nullptr) { + pthread_mutex_init(&handlesMutex, nullptr); + } + + ~ElfLoader(); + + /* Initialization code that can't run during static initialization. */ + void Init(); + + /* System loader handle for the library/program containing our code. This + * is used to resolve wrapped functions. */ + RefPtr<LibHandle> self_elf; + +#if defined(ANDROID) + /* System loader handle for the libc. This is used to resolve weak symbols + * that some libcs contain that the Android linker won't dlsym(). Normally, + * we wouldn't treat non-Android differently, but glibc uses versioned + * symbols which this linker doesn't support. */ + RefPtr<LibHandle> libc; + + /* And for libm. */ + RefPtr<LibHandle> libm; +#endif + + /* Bookkeeping */ + typedef std::vector<LibHandle*> LibHandleList; + LibHandleList handles; + + pthread_mutex_t handlesMutex; + + protected: + friend class CustomElf; + friend class LoadedElf; + + /* Definition of static destructors as to be used for C++ ABI compatibility */ + typedef void (*Destructor)(void* object); + + /** + * C++ ABI makes static initializers register destructors through a specific + * atexit interface. On glibc/linux systems, the dso_handle is a pointer + * within a given library. On bionic/android systems, it is an undefined + * symbol. Making sense of the value is not really important, and all that + * is really important is that it is different for each loaded library, so + * that they can be discriminated when shutting down. For convenience, on + * systems where the dso handle is a symbol, that symbol is resolved to + * point at corresponding CustomElf. + * + * Destructors are registered with __*_atexit with an associated object to + * be passed as argument when it is called. + * + * When __cxa_finalize is called, destructors registered for the given + * DSO handle are called in the reverse order they were registered. + */ +#ifdef __ARM_EABI__ + static int __wrap_aeabi_atexit(void* that, Destructor destructor, + void* dso_handle); +#else + static int __wrap_cxa_atexit(Destructor destructor, void* that, + void* dso_handle); +#endif + + static void __wrap_cxa_finalize(void* dso_handle); + + /** + * Registered destructor. Keeps track of the destructor function pointer, + * associated object to call it with, and DSO handle. + */ + class DestructorCaller { + public: + DestructorCaller(Destructor destructor, void* object, void* dso_handle) + : destructor(destructor), object(object), dso_handle(dso_handle) {} + + /** + * Call the destructor function with the associated object. + * Call only once, see CustomElf::~CustomElf. + */ + void Call(); + + /** + * Returns whether the destructor is associated to the given DSO handle + */ + bool IsForHandle(void* handle) const { return handle == dso_handle; } + + private: + Destructor destructor; + void* object; + void* dso_handle; + }; + + private: + /* Keep track of all registered destructors */ + std::vector<DestructorCaller> destructors; + + /* Forward declaration, see further below */ + class DebuggerHelper; + + public: + /* Loaded object descriptor for the debugger interface below*/ + struct link_map { + /* Base address of the loaded object. */ + const void* l_addr; + /* File name */ + const char* l_name; + /* Address of the PT_DYNAMIC segment. */ + const void* l_ld; + + private: + friend class ElfLoader::DebuggerHelper; + /* Double linked list of loaded objects. */ + link_map *l_next, *l_prev; + }; + + private: + /* Data structure used by the linker to give details about shared objects it + * loaded to debuggers. This is normally defined in link.h, but Android + * headers lack this file. */ + struct r_debug { + /* Version number of the protocol. */ + int r_version; + + /* Head of the linked list of loaded objects. */ + link_map* r_map; + + /* Function to be called when updates to the linked list of loaded objects + * are going to occur. The function is to be called before and after + * changes. */ + void (*r_brk)(void); + + /* Indicates to the debugger what state the linked list of loaded objects + * is in when the function above is called. */ + enum { + RT_CONSISTENT, /* Changes are complete */ + RT_ADD, /* Beginning to add a new object */ + RT_DELETE /* Beginning to remove an object */ + } r_state; + }; + + /* Memory representation of ELF Auxiliary Vectors */ + struct AuxVector { + Elf::Addr type; + Elf::Addr value; + }; + + /* Helper class used to integrate libraries loaded by this linker in + * r_debug */ + class DebuggerHelper { + public: + DebuggerHelper(); + + void Init(AuxVector* auvx); + + explicit operator bool() { return dbg; } + + /* Make the debugger aware of a new loaded object */ + void Add(link_map* map); + + /* Make the debugger aware of the unloading of an object */ + void Remove(link_map* map); + + /* Iterates over all link_maps */ + class iterator { + public: + const link_map* operator->() const { return item; } + + const link_map& operator++() { + item = item->l_next; + return *item; + } + + bool operator<(const iterator& other) const { + if (other.item == nullptr) return item ? true : false; + MOZ_CRASH( + "DebuggerHelper::iterator::operator< called with something else " + "than DebuggerHelper::end()"); + } + + protected: + friend class DebuggerHelper; + explicit iterator(const link_map* item) : item(item) {} + + private: + const link_map* item; + }; + + iterator begin() const { return iterator(dbg ? dbg->r_map : nullptr); } + + iterator end() const { return iterator(nullptr); } + + private: + r_debug* dbg; + link_map* firstAdded; + }; + friend int __wrap_dl_iterate_phdr(dl_phdr_cb callback, void* data); + DebuggerHelper dbg; +}; + +#endif /* ElfLoader_h */ diff --git a/mozglue/linker/Elfxx.h b/mozglue/linker/Elfxx.h new file mode 100644 index 0000000000..4baf923b55 --- /dev/null +++ b/mozglue/linker/Elfxx.h @@ -0,0 +1,246 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef Elfxx_h +#define Elfxx_h + +#include "Utils.h" + +/** + * Android system headers have two different elf.h file. The one under linux/ + * is the most complete on older Android API versions without unified headers. + */ +#if defined(ANDROID) && __ANDROID_API__ < 21 && !defined(__ANDROID_API_L__) +# include <linux/elf.h> +#else +# include <elf.h> +#endif +#include <endian.h> + +#if defined(__ARM_EABI__) && !defined(PT_ARM_EXIDX) +# define PT_ARM_EXIDX 0x70000001 +#endif + +/** + * Generic ELF macros for the target system + */ +#ifdef __LP64__ +# define Elf_(type) Elf64_##type +# define ELFCLASS ELFCLASS64 +# define ELF_R_TYPE ELF64_R_TYPE +# define ELF_R_SYM ELF64_R_SYM +# ifndef ELF_ST_BIND +# define ELF_ST_BIND ELF64_ST_BIND +# endif +#else +# define Elf_(type) Elf32_##type +# define ELFCLASS ELFCLASS32 +# define ELF_R_TYPE ELF32_R_TYPE +# define ELF_R_SYM ELF32_R_SYM +# ifndef ELF_ST_BIND +# define ELF_ST_BIND ELF32_ST_BIND +# endif +#endif + +#ifndef __BYTE_ORDER +# error Cannot find endianness +#endif + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ELFDATA ELFDATA2LSB +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ELFDATA ELFDATA2MSB +#endif + +#ifdef __linux__ +# define ELFOSABI ELFOSABI_LINUX +# ifdef EI_ABIVERSION +# define ELFABIVERSION 0 +# endif +#else +# error Unknown ELF OSABI +#endif + +#if defined(__i386__) +# define ELFMACHINE EM_386 + +// Doing this way probably doesn't scale to other architectures +# define R_ABS R_386_32 +# define R_GLOB_DAT R_386_GLOB_DAT +# define R_JMP_SLOT R_386_JMP_SLOT +# define R_RELATIVE R_386_RELATIVE +# define RELOC(n) DT_REL##n +# define UNSUPPORTED_RELOC(n) DT_RELA##n +# define STR_RELOC(n) "DT_REL" #n +# define Reloc Rel + +#elif defined(__x86_64__) +# define ELFMACHINE EM_X86_64 + +# define R_ABS R_X86_64_64 +# define R_GLOB_DAT R_X86_64_GLOB_DAT +# define R_JMP_SLOT R_X86_64_JUMP_SLOT +# define R_RELATIVE R_X86_64_RELATIVE +# define RELOC(n) DT_RELA##n +# define UNSUPPORTED_RELOC(n) DT_REL##n +# define STR_RELOC(n) "DT_RELA" #n +# define Reloc Rela + +#elif defined(__arm__) +# define ELFMACHINE EM_ARM + +# ifndef R_ARM_ABS32 +# define R_ARM_ABS32 2 +# endif +# ifndef R_ARM_GLOB_DAT +# define R_ARM_GLOB_DAT 21 +# endif +# ifndef R_ARM_JUMP_SLOT +# define R_ARM_JUMP_SLOT 22 +# endif +# ifndef R_ARM_RELATIVE +# define R_ARM_RELATIVE 23 +# endif + +# define R_ABS R_ARM_ABS32 +# define R_GLOB_DAT R_ARM_GLOB_DAT +# define R_JMP_SLOT R_ARM_JUMP_SLOT +# define R_RELATIVE R_ARM_RELATIVE +# define RELOC(n) DT_REL##n +# define UNSUPPORTED_RELOC(n) DT_RELA##n +# define STR_RELOC(n) "DT_REL" #n +# define Reloc Rel + +#elif defined(__aarch64__) +# define ELFMACHINE EM_AARCH64 + +# define R_ABS R_AARCH64_ABS64 +# define R_GLOB_DAT R_AARCH64_GLOB_DAT +# define R_JMP_SLOT R_AARCH64_JUMP_SLOT +# define R_RELATIVE R_AARCH64_RELATIVE +# define RELOC(n) DT_RELA##n +# define UNSUPPORTED_RELOC(n) DT_REL##n +# define STR_RELOC(n) "DT_RELA" #n +# define Reloc Rela + +#else +# error Unknown ELF machine type +#endif + +/** + * Android system headers don't have all definitions + */ +#ifndef STN_UNDEF +# define STN_UNDEF 0 +#endif +#ifndef DT_INIT_ARRAY +# define DT_INIT_ARRAY 25 +#endif +#ifndef DT_FINI_ARRAY +# define DT_FINI_ARRAY 26 +#endif +#ifndef DT_INIT_ARRAYSZ +# define DT_INIT_ARRAYSZ 27 +#endif +#ifndef DT_FINI_ARRAYSZ +# define DT_FINI_ARRAYSZ 28 +#endif +#ifndef DT_RELACOUNT +# define DT_RELACOUNT 0x6ffffff9 +#endif +#ifndef DT_RELCOUNT +# define DT_RELCOUNT 0x6ffffffa +#endif +#ifndef DT_VERSYM +# define DT_VERSYM 0x6ffffff0 +#endif +#ifndef DT_VERDEF +# define DT_VERDEF 0x6ffffffc +#endif +#ifndef DT_VERDEFNUM +# define DT_VERDEFNUM 0x6ffffffd +#endif +#ifndef DT_VERNEED +# define DT_VERNEED 0x6ffffffe +#endif +#ifndef DT_VERNEEDNUM +# define DT_VERNEEDNUM 0x6fffffff +#endif +#ifndef DT_FLAGS_1 +# define DT_FLAGS_1 0x6ffffffb +#endif +#ifndef DT_FLAGS +# define DT_FLAGS 30 +#endif +#ifndef DF_SYMBOLIC +# define DF_SYMBOLIC 0x00000002 +#endif +#ifndef DF_TEXTREL +# define DF_TEXTREL 0x00000004 +#endif + +namespace Elf { + +/** + * Define a few basic Elf Types + */ +typedef Elf_(Phdr) Phdr; +typedef Elf_(Dyn) Dyn; +typedef Elf_(Sym) Sym; +typedef Elf_(Addr) Addr; +typedef Elf_(Word) Word; +typedef Elf_(Half) Half; + +/** + * Helper class around the standard Elf header struct + */ +struct Ehdr : public Elf_(Ehdr) { + /** + * Equivalent to reinterpret_cast<const Ehdr *>(buf), but additionally + * checking that this is indeed an Elf header and that the Elf type + * corresponds to that of the system + */ + static const Ehdr* validate(const void* buf); +}; + +/** + * Elf String table + */ +class Strtab : public UnsizedArray<const char> { + public: + /** + * Returns the string at the given index in the table + */ + const char* GetStringAt(off_t index) const { + return &UnsizedArray<const char>::operator[](index); + } +}; + +/** + * Helper class around Elf relocation. + */ +struct Rel : public Elf_(Rel) { + /** + * Returns the addend for the relocation, which is the value stored + * at r_offset. + */ + Addr GetAddend(void* base) const { + return *(reinterpret_cast<const Addr*>(reinterpret_cast<const char*>(base) + + r_offset)); + } +}; + +/** + * Helper class around Elf relocation with addend. + */ +struct Rela : public Elf_(Rela) { + /** + * Returns the addend for the relocation. + */ + Addr GetAddend(void* base) const { return r_addend; } +}; + +} /* namespace Elf */ + +#endif /* Elfxx_h */ diff --git a/mozglue/linker/Linker.h b/mozglue/linker/Linker.h new file mode 100644 index 0000000000..77ddb06ecc --- /dev/null +++ b/mozglue/linker/Linker.h @@ -0,0 +1,24 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef Linker_h +#define Linker_h + +#ifdef MOZ_LINKER +# include "ElfLoader.h" +# define __wrap_sigaction SEGVHandler::__wrap_sigaction +#else +# include <dlfcn.h> +# include <link.h> +# include <signal.h> +# define __wrap_sigaction sigaction +# define __wrap_dlopen dlopen +# define __wrap_dlerror dlerror +# define __wrap_dlsym dlsym +# define __wrap_dlclose dlclose +# define __wrap_dladdr dladdr +# define __wrap_dl_iterate_phdr dl_iterate_phdr +#endif + +#endif diff --git a/mozglue/linker/Logging.cpp b/mozglue/linker/Logging.cpp new file mode 100644 index 0000000000..e61c7835d2 --- /dev/null +++ b/mozglue/linker/Logging.cpp @@ -0,0 +1,7 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "Logging.h" + +Logging Logging::Singleton; diff --git a/mozglue/linker/Logging.h b/mozglue/linker/Logging.h new file mode 100644 index 0000000000..1e66ea41de --- /dev/null +++ b/mozglue/linker/Logging.h @@ -0,0 +1,72 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef Logging_h +#define Logging_h + +#include <cstdlib> +#include "mozilla/Likely.h" +#include "mozilla/MacroArgs.h" + +#ifdef ANDROID +# include <android/log.h> +# define LOG(...) \ + __android_log_print(ANDROID_LOG_INFO, "GeckoLinker", __VA_ARGS__) +# define WARN(...) \ + __android_log_print(ANDROID_LOG_WARN, "GeckoLinker", __VA_ARGS__) +# define ERROR(...) \ + __android_log_print(ANDROID_LOG_ERROR, "GeckoLinker", __VA_ARGS__) +#else +# include <cstdio> + +/* Expand to 1 or m depending on whether there is one argument or more + * given. */ +# define MOZ_ONE_OR_MORE_ARGS_IMPL2(_1, _2, _3, _4, _5, _6, _7, _8, _9, N, \ + ...) \ + N +# define MOZ_ONE_OR_MORE_ARGS_IMPL(args) MOZ_ONE_OR_MORE_ARGS_IMPL2 args +# define MOZ_ONE_OR_MORE_ARGS(...) \ + MOZ_ONE_OR_MORE_ARGS_IMPL((__VA_ARGS__, m, m, m, m, m, m, m, m, 1, 0)) + +# define MOZ_MACRO_GLUE(a, b) a b + +/* Some magic to choose between LOG1 and LOGm depending on the number of + * arguments */ +# define MOZ_CHOOSE_LOG(...) \ + MOZ_MACRO_GLUE(MOZ_CONCAT(LOG, MOZ_ONE_OR_MORE_ARGS(__VA_ARGS__)), \ + (__VA_ARGS__)) + +# define LOG1(format) fprintf(stderr, format "\n") +# define LOGm(format, ...) fprintf(stderr, format "\n", __VA_ARGS__) +# define LOG(...) MOZ_CHOOSE_LOG(__VA_ARGS__) +# define WARN(...) MOZ_CHOOSE_LOG("Warning: " __VA_ARGS__) +# define ERROR(...) MOZ_CHOOSE_LOG("Error: " __VA_ARGS__) + +#endif + +class Logging { + public: + static bool isVerbose() { return Singleton.verbose; } + + private: + bool verbose; + + public: + static void Init() { + const char* env = getenv("MOZ_DEBUG_LINKER"); + if (env && *env == '1') Singleton.verbose = true; + } + + private: + static Logging Singleton; +}; + +#define DEBUG_LOG(...) \ + do { \ + if (MOZ_UNLIKELY(Logging::isVerbose())) { \ + LOG(__VA_ARGS__); \ + } \ + } while (0) + +#endif /* Logging_h */ diff --git a/mozglue/linker/Mappable.cpp b/mozglue/linker/Mappable.cpp new file mode 100644 index 0000000000..cacd6a46f6 --- /dev/null +++ b/mozglue/linker/Mappable.cpp @@ -0,0 +1,376 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <fcntl.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <cstring> +#include <cstdlib> +#include <cstdio> +#include <string> + +#include "Mappable.h" + +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/UniquePtr.h" + +#ifdef ANDROID +# include "mozilla/Ashmem.h" +#endif +#include <sys/stat.h> +#include <errno.h> +#include "ElfLoader.h" +#include "XZStream.h" +#include "Logging.h" + +using mozilla::MakeUnique; +using mozilla::UniquePtr; + +class CacheValidator { + public: + CacheValidator(const char* aCachedLibPath, Zip* aZip, Zip::Stream* aStream) + : mCachedLibPath(aCachedLibPath) { + static const char kChecksumSuffix[] = ".crc"; + + mCachedChecksumPath = + MakeUnique<char[]>(strlen(aCachedLibPath) + sizeof(kChecksumSuffix)); + sprintf(mCachedChecksumPath.get(), "%s%s", aCachedLibPath, kChecksumSuffix); + DEBUG_LOG("mCachedChecksumPath: %s", mCachedChecksumPath.get()); + + mChecksum = aStream->GetCRC32(); + DEBUG_LOG("mChecksum: %x", mChecksum); + } + + // Returns whether the cache is valid and up-to-date. + bool IsValid() const { + // Validate based on checksum. + RefPtr<Mappable> checksumMap = + MappableFile::Create(mCachedChecksumPath.get()); + if (!checksumMap) { + // Force caching if checksum is missing in cache. + return false; + } + + DEBUG_LOG("Comparing %x with %s", mChecksum, mCachedChecksumPath.get()); + MappedPtr checksumBuf = checksumMap->mmap(nullptr, checksumMap->GetLength(), + PROT_READ, MAP_PRIVATE, 0); + if (checksumBuf == MAP_FAILED) { + WARN("Couldn't map %s to validate checksum", mCachedChecksumPath.get()); + return false; + } + if (memcmp(checksumBuf, &mChecksum, sizeof(mChecksum))) { + return false; + } + return !access(mCachedLibPath.c_str(), R_OK); + } + + // Caches the APK-provided checksum used in future cache validations. + void CacheChecksum() const { + AutoCloseFD fd(open(mCachedChecksumPath.get(), + O_TRUNC | O_RDWR | O_CREAT | O_NOATIME, + S_IRUSR | S_IWUSR)); + if (fd == -1) { + WARN("Couldn't open %s to update checksum", mCachedChecksumPath.get()); + return; + } + + DEBUG_LOG("Updating checksum %s", mCachedChecksumPath.get()); + + const size_t size = sizeof(mChecksum); + size_t written = 0; + while (written < size) { + ssize_t ret = + write(fd, reinterpret_cast<const uint8_t*>(&mChecksum) + written, + size - written); + if (ret >= 0) { + written += ret; + } else if (errno != EINTR) { + WARN("Writing checksum %s failed with errno %d", + mCachedChecksumPath.get(), errno); + break; + } + } + } + + private: + const std::string mCachedLibPath; + UniquePtr<char[]> mCachedChecksumPath; + uint32_t mChecksum; +}; + +Mappable* MappableFile::Create(const char* path) { + int fd = open(path, O_RDONLY); + if (fd != -1) return new MappableFile(fd); + return nullptr; +} + +MemoryRange MappableFile::mmap(const void* addr, size_t length, int prot, + int flags, off_t offset) { + MOZ_ASSERT(fd != -1); + MOZ_ASSERT(!(flags & MAP_SHARED)); + flags |= MAP_PRIVATE; + + return MemoryRange::mmap(const_cast<void*>(addr), length, prot, flags, fd, + offset); +} + +void MappableFile::finalize() { + /* Close file ; equivalent to close(fd.forget()) */ + fd = -1; +} + +size_t MappableFile::GetLength() const { + struct stat st; + return fstat(fd, &st) ? 0 : st.st_size; +} + +Mappable* MappableExtractFile::Create(const char* name, Zip* zip, + Zip::Stream* stream) { + MOZ_ASSERT(zip && stream); + + const char* cachePath = getenv("MOZ_LINKER_CACHE"); + if (!cachePath || !*cachePath) { + WARN( + "MOZ_LINKER_EXTRACT is set, but not MOZ_LINKER_CACHE; " + "not extracting"); + return nullptr; + } + + // Ensure that the cache dir is private. + chmod(cachePath, 0770); + + UniquePtr<char[]> path = + MakeUnique<char[]>(strlen(cachePath) + strlen(name) + 2); + sprintf(path.get(), "%s/%s", cachePath, name); + + CacheValidator validator(path.get(), zip, stream); + if (validator.IsValid()) { + DEBUG_LOG("Reusing %s", static_cast<char*>(path.get())); + return MappableFile::Create(path.get()); + } + DEBUG_LOG("Extracting to %s", static_cast<char*>(path.get())); + AutoCloseFD fd; + fd = open(path.get(), O_TRUNC | O_RDWR | O_CREAT | O_NOATIME, + S_IRUSR | S_IWUSR); + if (fd == -1) { + ERROR("Couldn't open %s to decompress library", path.get()); + return nullptr; + } + AutoUnlinkFile file(path.release()); + if (stream->GetType() == Zip::Stream::DEFLATE) { + if (ftruncate(fd, stream->GetUncompressedSize()) == -1) { + ERROR("Couldn't ftruncate %s to decompress library", file.get()); + return nullptr; + } + /* Map the temporary file for use as inflate buffer */ + MappedPtr buffer(MemoryRange::mmap(nullptr, stream->GetUncompressedSize(), + PROT_WRITE, MAP_SHARED, fd, 0)); + if (buffer == MAP_FAILED) { + ERROR("Couldn't map %s to decompress library", file.get()); + return nullptr; + } + + z_stream zStream = stream->GetZStream(buffer); + + /* Decompress */ + if (inflateInit2(&zStream, -MAX_WBITS) != Z_OK) { + ERROR("inflateInit failed: %s", zStream.msg); + return nullptr; + } + if (inflate(&zStream, Z_FINISH) != Z_STREAM_END) { + ERROR("inflate failed: %s", zStream.msg); + return nullptr; + } + if (inflateEnd(&zStream) != Z_OK) { + ERROR("inflateEnd failed: %s", zStream.msg); + return nullptr; + } + if (zStream.total_out != stream->GetUncompressedSize()) { + ERROR("File not fully uncompressed! %ld / %d", zStream.total_out, + static_cast<unsigned int>(stream->GetUncompressedSize())); + return nullptr; + } + } else if (XZStream::IsXZ(stream->GetBuffer(), stream->GetSize())) { + XZStream xzStream(stream->GetBuffer(), stream->GetSize()); + + if (!xzStream.Init()) { + ERROR("Couldn't initialize XZ decoder"); + return nullptr; + } + DEBUG_LOG("XZStream created, compressed=%" PRIuPTR + ", uncompressed=%" PRIuPTR, + xzStream.Size(), xzStream.UncompressedSize()); + + if (ftruncate(fd, xzStream.UncompressedSize()) == -1) { + ERROR("Couldn't ftruncate %s to decompress library", file.get()); + return nullptr; + } + MappedPtr buffer(MemoryRange::mmap(nullptr, xzStream.UncompressedSize(), + PROT_WRITE, MAP_SHARED, fd, 0)); + if (buffer == MAP_FAILED) { + ERROR("Couldn't map %s to decompress library", file.get()); + return nullptr; + } + const size_t written = xzStream.Decode(buffer, buffer.GetLength()); + DEBUG_LOG("XZStream decoded %" PRIuPTR, written); + if (written != buffer.GetLength()) { + ERROR("Error decoding XZ file %s", file.get()); + return nullptr; + } + } else { + return nullptr; + } + + validator.CacheChecksum(); + return new MappableExtractFile(fd.forget(), file.release()); +} + +/** + * _MappableBuffer is a buffer which content can be mapped at different + * locations in the virtual address space. + * On Linux, uses a (deleted) temporary file on a tmpfs for sharable content. + * On Android, uses ashmem. + */ +class _MappableBuffer : public MappedPtr { + public: + /** + * Returns a _MappableBuffer instance with the given name and the given + * length. + */ + static _MappableBuffer* Create(const char* name, size_t length) { + AutoCloseFD fd; + const char* ident; +#ifdef ANDROID + /* On Android, initialize an ashmem region with the given length */ + fd = mozilla::android::ashmem_create(name, length); + ident = name; +#else + /* On Linux, use /dev/shm as base directory for temporary files, assuming + * it's on tmpfs */ + /* TODO: check that /dev/shm is tmpfs */ + char path[256]; + sprintf(path, "/dev/shm/%s.XXXXXX", name); + fd = mkstemp(path); + if (fd == -1) return nullptr; + unlink(path); + ftruncate(fd, length); + ident = path; +#endif + + void* buf = + ::mmap(nullptr, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (buf != MAP_FAILED) { + DEBUG_LOG("Decompression buffer of size 0x%" PRIxPTR + " in " +#ifdef ANDROID + "ashmem " +#endif + "\"%s\", mapped @%p", + length, ident, buf); + return new _MappableBuffer(fd.forget(), buf, length); + } + return nullptr; + } + + void* mmap(const void* addr, size_t length, int prot, int flags, + off_t offset) { + MOZ_ASSERT(fd != -1); +#ifdef ANDROID + /* Mapping ashmem MAP_PRIVATE is like mapping anonymous memory, even when + * there is content in the ashmem */ + if (flags & MAP_PRIVATE) { + flags &= ~MAP_PRIVATE; + flags |= MAP_SHARED; + } +#endif + return ::mmap(const_cast<void*>(addr), length, prot, flags, fd, offset); + } + + private: + _MappableBuffer(int fd, void* buf, size_t length) + : MappedPtr(buf, length), fd(fd) {} + + /* File descriptor for the temporary file or ashmem */ + AutoCloseFD fd; +}; + +Mappable* MappableDeflate::Create(const char* name, Zip* zip, + Zip::Stream* stream) { + MOZ_ASSERT(stream->GetType() == Zip::Stream::DEFLATE); + _MappableBuffer* buf = + _MappableBuffer::Create(name, stream->GetUncompressedSize()); + if (buf) return new MappableDeflate(buf, zip, stream); + return nullptr; +} + +MappableDeflate::MappableDeflate(_MappableBuffer* buf, Zip* zip, + Zip::Stream* stream) + : zip(zip), buffer(buf), zStream(stream->GetZStream(*buf)) {} + +MappableDeflate::~MappableDeflate() {} + +MemoryRange MappableDeflate::mmap(const void* addr, size_t length, int prot, + int flags, off_t offset) { + MOZ_ASSERT(buffer); + MOZ_ASSERT(!(flags & MAP_SHARED)); + flags |= MAP_PRIVATE; + + /* The deflate stream is uncompressed up to the required offset + length, if + * it hasn't previously been uncompressed */ + ssize_t missing = offset + length + zStream.avail_out - buffer->GetLength(); + if (missing > 0) { + uInt avail_out = zStream.avail_out; + zStream.avail_out = missing; + if ((*buffer == zStream.next_out) && + (inflateInit2(&zStream, -MAX_WBITS) != Z_OK)) { + ERROR("inflateInit failed: %s", zStream.msg); + return MemoryRange(MAP_FAILED, 0); + } + int ret = inflate(&zStream, Z_SYNC_FLUSH); + if (ret < 0) { + ERROR("inflate failed: %s", zStream.msg); + return MemoryRange(MAP_FAILED, 0); + } + if (ret == Z_NEED_DICT) { + ERROR("zstream requires a dictionary. %s", zStream.msg); + return MemoryRange(MAP_FAILED, 0); + } + zStream.avail_out = avail_out - missing + zStream.avail_out; + if (ret == Z_STREAM_END) { + if (inflateEnd(&zStream) != Z_OK) { + ERROR("inflateEnd failed: %s", zStream.msg); + return MemoryRange(MAP_FAILED, 0); + } + if (zStream.total_out != buffer->GetLength()) { + ERROR("File not fully uncompressed! %ld / %d", zStream.total_out, + static_cast<unsigned int>(buffer->GetLength())); + return MemoryRange(MAP_FAILED, 0); + } + } + } +#if defined(ANDROID) && defined(__arm__) + if (prot & PROT_EXEC) { + /* We just extracted data that may be executed in the future. + * We thus need to ensure Instruction and Data cache coherency. */ + DEBUG_LOG("cacheflush(%p, %p)", *buffer + offset, + *buffer + (offset + length)); + cacheflush(reinterpret_cast<uintptr_t>(*buffer + offset), + reinterpret_cast<uintptr_t>(*buffer + (offset + length)), 0); + } +#endif + + return MemoryRange(buffer->mmap(addr, length, prot, flags, offset), length); +} + +void MappableDeflate::finalize() { + /* Free zlib internal buffers */ + inflateEnd(&zStream); + /* Free decompression buffer */ + buffer = nullptr; + /* Remove reference to Zip archive */ + zip = nullptr; +} + +size_t MappableDeflate::GetLength() const { return buffer->GetLength(); } diff --git a/mozglue/linker/Mappable.h b/mozglue/linker/Mappable.h new file mode 100644 index 0000000000..8468aaaccb --- /dev/null +++ b/mozglue/linker/Mappable.h @@ -0,0 +1,161 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef Mappable_h +#define Mappable_h + +#include "Zip.h" +#include "mozilla/RefPtr.h" +#include "mozilla/UniquePtr.h" +#include "zlib.h" + +/** + * Abstract class to handle mmap()ing from various kind of entities, such as + * plain files or Zip entries. The virtual members are meant to act as the + * equivalent system functions, except mapped memory is always MAP_PRIVATE, + * even though a given implementation may use something different internally. + */ +class Mappable : public mozilla::RefCounted<Mappable> { + public: + MOZ_DECLARE_REFCOUNTED_TYPENAME(Mappable) + virtual ~Mappable() {} + + virtual MemoryRange mmap(const void* addr, size_t length, int prot, int flags, + off_t offset) = 0; + + enum Kind { + MAPPABLE_FILE, + MAPPABLE_EXTRACT_FILE, + MAPPABLE_DEFLATE, + MAPPABLE_SEEKABLE_ZSTREAM + }; + + virtual Kind GetKind() const = 0; + + private: + virtual void munmap(void* addr, size_t length) { ::munmap(addr, length); } + /* Limit use of Mappable::munmap to classes that keep track of the address + * and size of the mapping. This allows to ignore ::munmap return value. */ + friend class Mappable1stPagePtr; + friend class LibHandle; + + public: + /** + * Indicate to a Mappable instance that no further mmap is going to happen. + */ + virtual void finalize() = 0; + + /** + * Returns the maximum length that can be mapped from this Mappable for + * offset = 0. + */ + virtual size_t GetLength() const = 0; +}; + +/** + * Mappable implementation for plain files + */ +class MappableFile : public Mappable { + public: + ~MappableFile() {} + + /** + * Create a MappableFile instance for the given file path. + */ + static Mappable* Create(const char* path); + + /* Inherited from Mappable */ + virtual MemoryRange mmap(const void* addr, size_t length, int prot, int flags, + off_t offset); + virtual void finalize(); + virtual size_t GetLength() const; + + virtual Kind GetKind() const { return MAPPABLE_FILE; }; + + protected: + explicit MappableFile(int fd) : fd(fd) {} + + private: + /* File descriptor */ + AutoCloseFD fd; +}; + +/** + * Mappable implementation for deflated stream in a Zip archive + * Inflates the complete stream into a cache file. + */ +class MappableExtractFile : public MappableFile { + public: + ~MappableExtractFile() = default; + + /** + * Create a MappableExtractFile instance for the given Zip stream. The name + * argument is used to create the cache file in the cache directory. + */ + static Mappable* Create(const char* name, Zip* zip, Zip::Stream* stream); + + /* Override finalize from MappableFile */ + virtual void finalize() {} + + virtual Kind GetKind() const { return MAPPABLE_EXTRACT_FILE; }; + + private: + /** + * AutoUnlinkFile keeps track of a file name and removes (unlinks) the file + * when the instance is destroyed. + */ + struct UnlinkFile { + void operator()(char* value) { + unlink(value); + delete[] value; + } + }; + typedef mozilla::UniquePtr<char[], UnlinkFile> AutoUnlinkFile; + + MappableExtractFile(int fd, const char* path) + : MappableFile(fd), path(path) {} + + /* Extracted file path */ + mozilla::UniquePtr<const char[]> path; +}; + +class _MappableBuffer; + +/** + * Mappable implementation for deflated stream in a Zip archive. + * Inflates the mapped bits in a temporary buffer. + */ +class MappableDeflate : public Mappable { + public: + ~MappableDeflate(); + + /** + * Create a MappableDeflate instance for the given Zip stream. The name + * argument is used for an appropriately named temporary file, and the Zip + * instance is given for the MappableDeflate to keep a reference of it. + */ + static Mappable* Create(const char* name, Zip* zip, Zip::Stream* stream); + + /* Inherited from Mappable */ + virtual MemoryRange mmap(const void* addr, size_t length, int prot, int flags, + off_t offset); + virtual void finalize(); + virtual size_t GetLength() const; + + virtual Kind GetKind() const { return MAPPABLE_DEFLATE; }; + + private: + MappableDeflate(_MappableBuffer* buf, Zip* zip, Zip::Stream* stream); + + /* Zip reference */ + RefPtr<Zip> zip; + + /* Decompression buffer */ + mozilla::UniquePtr<_MappableBuffer> buffer; + + /* Zlib data */ + z_stream zStream; +}; + +#endif /* Mappable_h */ diff --git a/mozglue/linker/Utils.h b/mozglue/linker/Utils.h new file mode 100644 index 0000000000..d3827f1f41 --- /dev/null +++ b/mozglue/linker/Utils.h @@ -0,0 +1,532 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef Utils_h +#define Utils_h + +#include <pthread.h> +#include <stdint.h> +#include <stddef.h> +#include <sys/mman.h> +#include <unistd.h> +#include "mozilla/Assertions.h" +#include "mozilla/Atomics.h" +#include "mozilla/Scoped.h" + +/** + * On architectures that are little endian and that support unaligned reads, + * we can use direct type, but on others, we want to have a special class + * to handle conversion and alignment issues. + */ +#if !defined(DEBUG) && (defined(__i386__) || defined(__x86_64__)) +typedef uint16_t le_uint16; +typedef uint32_t le_uint32; +#else + +/** + * Template that allows to find an unsigned int type from a (computed) bit size + */ +template <int s> +struct UInt {}; +template <> +struct UInt<16> { + typedef uint16_t Type; +}; +template <> +struct UInt<32> { + typedef uint32_t Type; +}; + +/** + * Template to access 2 n-bit sized words as a 2*n-bit sized word, doing + * conversion from little endian and avoiding alignment issues. + */ +template <typename T> +class le_to_cpu { + public: + typedef typename UInt<16 * sizeof(T)>::Type Type; + + operator Type() const { return (b << (sizeof(T) * 8)) | a; } + + const le_to_cpu& operator=(const Type& v) { + a = v & ((1 << (sizeof(T) * 8)) - 1); + b = v >> (sizeof(T) * 8); + return *this; + } + + le_to_cpu() {} + explicit le_to_cpu(const Type& v) { operator=(v); } + + const le_to_cpu& operator+=(const Type& v) { + return operator=(operator Type() + v); + } + + const le_to_cpu& operator++(int) { return operator=(operator Type() + 1); } + + private: + T a, b; +}; + +/** + * Type definitions + */ +typedef le_to_cpu<unsigned char> le_uint16; +typedef le_to_cpu<le_uint16> le_uint32; +#endif + +/** + * AutoCloseFD is a RAII wrapper for POSIX file descriptors + */ +struct AutoCloseFDTraits { + typedef int type; + static int empty() { return -1; } + static void release(int fd) { + if (fd != -1) close(fd); + } +}; +typedef mozilla::Scoped<AutoCloseFDTraits> AutoCloseFD; + +/** + * AutoCloseFILE is a RAII wrapper for POSIX streams + */ +struct AutoCloseFILETraits { + typedef FILE* type; + static FILE* empty() { return nullptr; } + static void release(FILE* f) { + if (f) fclose(f); + } +}; +typedef mozilla::Scoped<AutoCloseFILETraits> AutoCloseFILE; + +extern mozilla::Atomic<size_t, mozilla::ReleaseAcquire> gPageSize; + +/** + * Page alignment helpers + */ +static size_t PageSize() { + if (!gPageSize) { + gPageSize = sysconf(_SC_PAGESIZE); + } + + return gPageSize; +} + +static inline uintptr_t AlignedPtr(uintptr_t ptr, size_t alignment) { + return ptr & ~(alignment - 1); +} + +template <typename T> +static inline T* AlignedPtr(T* ptr, size_t alignment) { + return reinterpret_cast<T*>( + AlignedPtr(reinterpret_cast<uintptr_t>(ptr), alignment)); +} + +template <typename T> +static inline T PageAlignedPtr(T ptr) { + return AlignedPtr(ptr, PageSize()); +} + +static inline uintptr_t AlignedEndPtr(uintptr_t ptr, size_t alignment) { + return AlignedPtr(ptr + alignment - 1, alignment); +} + +template <typename T> +static inline T* AlignedEndPtr(T* ptr, size_t alignment) { + return reinterpret_cast<T*>( + AlignedEndPtr(reinterpret_cast<uintptr_t>(ptr), alignment)); +} + +template <typename T> +static inline T PageAlignedEndPtr(T ptr) { + return AlignedEndPtr(ptr, PageSize()); +} + +static inline size_t AlignedSize(size_t size, size_t alignment) { + return (size + alignment - 1) & ~(alignment - 1); +} + +static inline size_t PageAlignedSize(size_t size) { + return AlignedSize(size, PageSize()); +} + +static inline bool IsAlignedPtr(uintptr_t ptr, size_t alignment) { + return ptr % alignment == 0; +} + +template <typename T> +static inline bool IsAlignedPtr(T* ptr, size_t alignment) { + return IsAlignedPtr(reinterpret_cast<uintptr_t>(ptr), alignment); +} + +template <typename T> +static inline bool IsPageAlignedPtr(T ptr) { + return IsAlignedPtr(ptr, PageSize()); +} + +static inline bool IsAlignedSize(size_t size, size_t alignment) { + return size % alignment == 0; +} + +static inline bool IsPageAlignedSize(size_t size) { + return IsAlignedSize(size, PageSize()); +} + +static inline size_t PageNumber(size_t size) { + return (size + PageSize() - 1) / PageSize(); +} + +/** + * MemoryRange stores a pointer, size pair. + */ +class MemoryRange { + public: + MemoryRange(void* buf, size_t length) : buf(buf), length(length) {} + + void Assign(void* b, size_t len) { + buf = b; + length = len; + } + + void Assign(const MemoryRange& other) { + buf = other.buf; + length = other.length; + } + + void* get() const { return buf; } + + operator void*() const { return buf; } + + operator unsigned char*() const { + return reinterpret_cast<unsigned char*>(buf); + } + + bool operator==(void* ptr) const { return buf == ptr; } + + bool operator==(unsigned char* ptr) const { return buf == ptr; } + + void* operator+(off_t offset) const { + return reinterpret_cast<char*>(buf) + offset; + } + + /** + * Returns whether the given address is within the mapped range + */ + bool Contains(void* ptr) const { + return (ptr >= buf) && (ptr < reinterpret_cast<char*>(buf) + length); + } + + /** + * Returns the length of the mapped range + */ + size_t GetLength() const { return length; } + + static MemoryRange mmap(void* addr, size_t length, int prot, int flags, + int fd, off_t offset) { + return MemoryRange(::mmap(addr, length, prot, flags, fd, offset), length); + } + + private: + void* buf; + size_t length; +}; + +/** + * MappedPtr is a RAII wrapper for mmap()ed memory. It can be used as + * a simple void * or unsigned char *. + * + * It is defined as a derivative of a template that allows to use a + * different unmapping strategy. + */ +template <typename T> +class GenericMappedPtr : public MemoryRange { + public: + GenericMappedPtr(void* buf, size_t length) : MemoryRange(buf, length) {} + explicit GenericMappedPtr(const MemoryRange& other) : MemoryRange(other) {} + GenericMappedPtr() : MemoryRange(MAP_FAILED, 0) {} + + void Assign(void* b, size_t len) { + if (get() != MAP_FAILED) static_cast<T*>(this)->munmap(get(), GetLength()); + MemoryRange::Assign(b, len); + } + + void Assign(const MemoryRange& other) { + Assign(other.get(), other.GetLength()); + } + + ~GenericMappedPtr() { + if (get() != MAP_FAILED) static_cast<T*>(this)->munmap(get(), GetLength()); + } + + void release() { MemoryRange::Assign(MAP_FAILED, 0); } +}; + +struct MappedPtr : public GenericMappedPtr<MappedPtr> { + MappedPtr(void* buf, size_t length) + : GenericMappedPtr<MappedPtr>(buf, length) {} + MOZ_IMPLICIT MappedPtr(const MemoryRange& other) + : GenericMappedPtr<MappedPtr>(other) {} + MappedPtr() : GenericMappedPtr<MappedPtr>() {} + + private: + friend class GenericMappedPtr<MappedPtr>; + void munmap(void* buf, size_t length) { ::munmap(buf, length); } +}; + +/** + * UnsizedArray is a way to access raw arrays of data in memory. + * + * struct S { ... }; + * UnsizedArray<S> a(buf); + * UnsizedArray<S> b; b.Init(buf); + * + * This is roughly equivalent to + * const S *a = reinterpret_cast<const S *>(buf); + * const S *b = nullptr; b = reinterpret_cast<const S *>(buf); + * + * An UnsizedArray has no known length, and it's up to the caller to make + * sure the accessed memory is mapped and makes sense. + */ +template <typename T> +class UnsizedArray { + public: + typedef size_t idx_t; + + /** + * Constructors and Initializers + */ + UnsizedArray() : contents(nullptr) {} + explicit UnsizedArray(const void* buf) + : contents(reinterpret_cast<const T*>(buf)) {} + + void Init(const void* buf) { + MOZ_ASSERT(contents == nullptr); + contents = reinterpret_cast<const T*>(buf); + } + + /** + * Returns the nth element of the array + */ + const T& operator[](const idx_t index) const { + MOZ_ASSERT(contents); + return contents[index]; + } + + operator const T*() const { return contents; } + /** + * Returns whether the array points somewhere + */ + explicit operator bool() const { return contents != nullptr; } + + private: + const T* contents; +}; + +/** + * Array, like UnsizedArray, is a way to access raw arrays of data in memory. + * Unlike UnsizedArray, it has a known length, and is enumerable with an + * iterator. + * + * struct S { ... }; + * Array<S> a(buf, len); + * UnsizedArray<S> b; b.Init(buf, len); + * + * In the above examples, len is the number of elements in the array. It is + * also possible to initialize an Array with the buffer size: + * + * Array<S> c; c.InitSize(buf, size); + * + * It is also possible to initialize an Array in two steps, only providing + * one data at a time: + * + * Array<S> d; + * d.Init(buf); + * d.Init(len); // or d.InitSize(size); + * + */ +template <typename T> +class Array : public UnsizedArray<T> { + public: + typedef typename UnsizedArray<T>::idx_t idx_t; + + /** + * Constructors and Initializers + */ + Array() : UnsizedArray<T>(), length(0) {} + Array(const void* buf, const idx_t length) + : UnsizedArray<T>(buf), length(length) {} + + void Init(const void* buf) { UnsizedArray<T>::Init(buf); } + + void Init(const idx_t len) { + MOZ_ASSERT(length == 0); + length = len; + } + + void InitSize(const idx_t size) { Init(size / sizeof(T)); } + + void Init(const void* buf, const idx_t len) { + UnsizedArray<T>::Init(buf); + Init(len); + } + + void InitSize(const void* buf, const idx_t size) { + UnsizedArray<T>::Init(buf); + InitSize(size); + } + + /** + * Returns the nth element of the array + */ + const T& operator[](const idx_t index) const { + MOZ_ASSERT(index < length); + MOZ_ASSERT(operator bool()); + return UnsizedArray<T>::operator[](index); + } + + /** + * Returns the number of elements in the array + */ + idx_t numElements() const { return length; } + + /** + * Returns whether the array points somewhere and has at least one element. + */ + explicit operator bool() const { + return (length > 0) && UnsizedArray<T>::operator bool(); + } + + /** + * Iterator for an Array. Use is similar to that of STL const_iterators: + * + * struct S { ... }; + * Array<S> a(buf, len); + * for (Array<S>::iterator it = a.begin(); it < a.end(); ++it) { + * // Do something with *it. + * } + */ + class iterator { + public: + iterator() : item(nullptr) {} + + const T& operator*() const { return *item; } + + const T* operator->() const { return item; } + + iterator& operator++() { + ++item; + return *this; + } + + bool operator<(const iterator& other) const { return item < other.item; } + + protected: + friend class Array<T>; + explicit iterator(const T& item) : item(&item) {} + + private: + const T* item; + }; + + /** + * Returns an iterator pointing at the beginning of the Array + */ + iterator begin() const { + if (length) return iterator(UnsizedArray<T>::operator[](0)); + return iterator(); + } + + /** + * Returns an iterator pointing past the end of the Array + */ + iterator end() const { + if (length) return iterator(UnsizedArray<T>::operator[](length)); + return iterator(); + } + + /** + * Reverse iterator for an Array. Use is similar to that of STL + * const_reverse_iterators: + * + * struct S { ... }; + * Array<S> a(buf, len); + * for (Array<S>::reverse_iterator it = a.rbegin(); it < a.rend(); ++it) { + * // Do something with *it. + * } + */ + class reverse_iterator { + public: + reverse_iterator() : item(nullptr) {} + + const T& operator*() const { + const T* tmp = item; + return *--tmp; + } + + const T* operator->() const { return &operator*(); } + + reverse_iterator& operator++() { + --item; + return *this; + } + + bool operator<(const reverse_iterator& other) const { + return item > other.item; + } + + protected: + friend class Array<T>; + explicit reverse_iterator(const T& item) : item(&item) {} + + private: + const T* item; + }; + + /** + * Returns a reverse iterator pointing at the end of the Array + */ + reverse_iterator rbegin() const { + if (length) return reverse_iterator(UnsizedArray<T>::operator[](length)); + return reverse_iterator(); + } + + /** + * Returns a reverse iterator pointing past the beginning of the Array + */ + reverse_iterator rend() const { + if (length) return reverse_iterator(UnsizedArray<T>::operator[](0)); + return reverse_iterator(); + } + + private: + idx_t length; +}; + +/** + * Transforms a pointer-to-function to a pointer-to-object pointing at the + * same address. + */ +template <typename T> +void* FunctionPtr(T func) { + union { + void* ptr; + T func; + } f; + f.func = func; + return f.ptr; +} + +class AutoLock { + public: + explicit AutoLock(pthread_mutex_t* mutex) : mutex(mutex) { + if (pthread_mutex_lock(mutex)) MOZ_CRASH("pthread_mutex_lock failed"); + } + ~AutoLock() { + if (pthread_mutex_unlock(mutex)) MOZ_CRASH("pthread_mutex_unlock failed"); + } + + private: + pthread_mutex_t* mutex; +}; + +#endif /* Utils_h */ diff --git a/mozglue/linker/XZStream.cpp b/mozglue/linker/XZStream.cpp new file mode 100644 index 0000000000..db154d12aa --- /dev/null +++ b/mozglue/linker/XZStream.cpp @@ -0,0 +1,221 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "XZStream.h" + +#include <algorithm> +#include <cstring> +#include "mozilla/Assertions.h" +#include "mozilla/CheckedInt.h" +#include "Logging.h" + +// LZMA dictionary size, should have a minimum size for the given compression +// rate, see XZ Utils docs for details. +static const uint32_t kDictSize = 1 << 24; + +static const size_t kFooterSize = 12; + +// Parses a variable-length integer (VLI), +// see http://tukaani.org/xz/xz-file-format.txt for details. +static size_t ParseVarLenInt(const uint8_t* aBuf, size_t aBufSize, + uint64_t* aValue) { + if (!aBufSize) { + return 0; + } + aBufSize = std::min(size_t(9), aBufSize); + + *aValue = aBuf[0] & 0x7F; + size_t i = 0; + + while (aBuf[i++] & 0x80) { + if (i >= aBufSize || aBuf[i] == 0x0) { + return 0; + } + *aValue |= static_cast<uint64_t>(aBuf[i] & 0x7F) << (i * 7); + } + return i; +} + +/* static */ +bool XZStream::IsXZ(const void* aBuf, size_t aBufSize) { + static const uint8_t kXzMagic[] = {0xfd, '7', 'z', 'X', 'Z', 0x0}; + MOZ_ASSERT(aBuf); + return aBufSize > sizeof(kXzMagic) && + !memcmp(reinterpret_cast<const void*>(kXzMagic), aBuf, + sizeof(kXzMagic)); +} + +XZStream::XZStream(const void* aInBuf, size_t aInSize) + : mInBuf(static_cast<const uint8_t*>(aInBuf)), + mUncompSize(0), + mDec(nullptr) { + mBuffers.in = mInBuf; + mBuffers.in_pos = 0; + mBuffers.in_size = aInSize; +} + +XZStream::~XZStream() { xz_dec_end(mDec); } + +bool XZStream::Init() { +#ifdef XZ_USE_CRC64 + xz_crc64_init(); +#endif + xz_crc32_init(); + + mDec = xz_dec_init(XZ_DYNALLOC, kDictSize); + + if (!mDec) { + return false; + } + + mUncompSize = ParseUncompressedSize(); + if (!mUncompSize) { + return false; + } + + return true; +} + +size_t XZStream::Decode(void* aOutBuf, size_t aOutSize) { + if (!mDec) { + return 0; + } + + mBuffers.out = static_cast<uint8_t*>(aOutBuf); + mBuffers.out_pos = 0; + mBuffers.out_size = aOutSize; + + while (mBuffers.in_pos < mBuffers.in_size && + mBuffers.out_pos < mBuffers.out_size) { + const xz_ret ret = xz_dec_run(mDec, &mBuffers); + + switch (ret) { + case XZ_STREAM_END: + // Stream ended, the next loop iteration should terminate. + MOZ_ASSERT(mBuffers.in_pos == mBuffers.in_size); + [[fallthrough]]; +#ifdef XZ_DEC_ANY_CHECK + case XZ_UNSUPPORTED_CHECK: + // Ignore unsupported check. + [[fallthrough]]; +#endif + case XZ_OK: + // Chunk decoded, proceed. + break; + + case XZ_MEM_ERROR: + ERROR("XZ decoding: memory allocation failed"); + return 0; + + case XZ_MEMLIMIT_ERROR: + ERROR("XZ decoding: memory usage limit reached"); + return 0; + + case XZ_FORMAT_ERROR: + ERROR("XZ decoding: invalid stream format"); + return 0; + + case XZ_OPTIONS_ERROR: + ERROR("XZ decoding: unsupported header options"); + return 0; + + case XZ_DATA_ERROR: + [[fallthrough]]; + case XZ_BUF_ERROR: + ERROR("XZ decoding: corrupt input stream"); + return 0; + + default: + MOZ_ASSERT_UNREACHABLE("XZ decoding: unknown error condition"); + return 0; + } + } + return mBuffers.out_pos; +} + +size_t XZStream::RemainingInput() const { + return mBuffers.in_size - mBuffers.in_pos; +} + +size_t XZStream::Size() const { return mBuffers.in_size; } + +size_t XZStream::UncompressedSize() const { return mUncompSize; } + +size_t XZStream::ParseIndexSize() const { + static const uint8_t kFooterMagic[] = {'Y', 'Z'}; + + const uint8_t* footer = mInBuf + mBuffers.in_size - kFooterSize; + // The magic bytes are at the end of the footer. + if (memcmp(reinterpret_cast<const void*>(kFooterMagic), + footer + kFooterSize - sizeof(kFooterMagic), + sizeof(kFooterMagic))) { + // Not a valid footer at stream end. + ERROR("XZ parsing: Invalid footer at end of stream"); + return 0; + } + // Backward size is a 32 bit LE integer field positioned after the 32 bit + // CRC32 code. It encodes the index size as a multiple of 4 bytes with a + // minimum size of 4 bytes. + const uint32_t backwardSizeRaw = *(footer + 4); + // Check for overflow. + mozilla::CheckedInt<size_t> backwardSizeBytes(backwardSizeRaw); + backwardSizeBytes = (backwardSizeBytes + 1) * 4; + if (!backwardSizeBytes.isValid()) { + ERROR("XZ parsing: Cannot parse index size"); + return 0; + } + return backwardSizeBytes.value(); +} + +size_t XZStream::ParseUncompressedSize() const { + static const uint8_t kIndexIndicator[] = {0x0}; + + const size_t indexSize = ParseIndexSize(); + if (!indexSize) { + return 0; + } + // The footer follows directly the index, so we can use it as a reference. + const uint8_t* end = mInBuf + mBuffers.in_size; + const uint8_t* index = end - kFooterSize - indexSize; + + // The xz stream index consists of three concatenated elements: + // (1) 1 byte indicator (always OxOO) + // (2) a Variable Length Integer (VLI) field for the number of records + // (3) a list of records + // See https://tukaani.org/xz/xz-file-format-1.0.4.txt + // Each record contains a VLI field for unpadded size followed by a var field + // for uncompressed size. We only support xz streams with a single record. + + if (memcmp(reinterpret_cast<const void*>(kIndexIndicator), index, + sizeof(kIndexIndicator))) { + ERROR("XZ parsing: Invalid stream index"); + return 0; + } + + index += sizeof(kIndexIndicator); + uint64_t numRecords = 0; + index += ParseVarLenInt(index, end - index, &numRecords); + // Only streams with a single record are supported. + if (numRecords != 1) { + ERROR("XZ parsing: Multiple records not supported"); + return 0; + } + uint64_t unpaddedSize = 0; + index += ParseVarLenInt(index, end - index, &unpaddedSize); + if (!unpaddedSize) { + ERROR("XZ parsing: Unpadded size is 0"); + return 0; + } + uint64_t uncompressedSize = 0; + index += ParseVarLenInt(index, end - index, &uncompressedSize); + mozilla::CheckedInt<size_t> checkedSize(uncompressedSize); + if (!checkedSize.isValid()) { + ERROR("XZ parsing: Uncompressed stream size is too large"); + return 0; + } + + return checkedSize.value(); +} diff --git a/mozglue/linker/XZStream.h b/mozglue/linker/XZStream.h new file mode 100644 index 0000000000..bab5520e37 --- /dev/null +++ b/mozglue/linker/XZStream.h @@ -0,0 +1,49 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef XZSTREAM_h +#define XZSTREAM_h + +#include <cstdlib> +#include <stdint.h> + +#define XZ_DEC_DYNALLOC +#include "xz.h" + +// Used to decode XZ stream buffers. +class XZStream { + public: + // Returns whether the provided buffer is likely a XZ stream. + static bool IsXZ(const void* aBuf, size_t aBufSize); + + // Creates a XZ stream object for the given input buffer. + XZStream(const void* aInBuf, size_t aInSize); + ~XZStream(); + + // Initializes the decoder and returns whether decoding may commence. + bool Init(); + // Decodes the next chunk of input into the given output buffer. + size_t Decode(void* aOutBuf, size_t aOutSize); + // Returns the number of yet undecoded bytes in the input buffer. + size_t RemainingInput() const; + // Returns the total number of bytes in the input buffer (compressed size). + size_t Size() const; + // Returns the expected final number of bytes in the output buffer. + // Note: will return 0 before successful Init(). + size_t UncompressedSize() const; + + private: + // Parses the stream footer and returns the size of the index in bytes. + size_t ParseIndexSize() const; + // Parses the stream index and returns the expected uncompressed size in + // bytes. + size_t ParseUncompressedSize() const; + + const uint8_t* mInBuf; + size_t mUncompSize; + xz_buf mBuffers; + xz_dec* mDec; +}; + +#endif // XZSTREAM_h diff --git a/mozglue/linker/Zip.cpp b/mozglue/linker/Zip.cpp new file mode 100644 index 0000000000..7ecc6b9a74 --- /dev/null +++ b/mozglue/linker/Zip.cpp @@ -0,0 +1,277 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <sys/mman.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <cstdlib> +#include <algorithm> +#include "Logging.h" +#include "Zip.h" + +already_AddRefed<Zip> Zip::Create(const char* filename) { + /* Open and map the file in memory */ + AutoCloseFD fd(open(filename, O_RDONLY)); + if (fd == -1) { + ERROR("Error opening %s: %s", filename, strerror(errno)); + return nullptr; + } + struct stat st; + if (fstat(fd, &st) == -1) { + ERROR("Error stating %s: %s", filename, strerror(errno)); + return nullptr; + } + size_t size = st.st_size; + if (size <= sizeof(CentralDirectoryEnd)) { + ERROR("Error reading %s: too short", filename); + return nullptr; + } + void* mapped = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd, 0); + if (mapped == MAP_FAILED) { + ERROR("Error mmapping %s: %s", filename, strerror(errno)); + return nullptr; + } + DEBUG_LOG("Mapped %s @%p", filename, mapped); + + return Create(filename, mapped, size); +} + +already_AddRefed<Zip> Zip::Create(const char* filename, void* mapped, + size_t size) { + RefPtr<Zip> zip = new Zip(filename, mapped, size); + + // If neither the first Local File entry nor central directory entries + // have been found, the zip was invalid. + if (!zip->nextFile && !zip->entries) { + ERROR("%s - Invalid zip", filename); + return nullptr; + } + + ZipCollection::Singleton.Register(zip); + return zip.forget(); +} + +Zip::Zip(const char* filename, void* mapped, size_t size) + : name(filename ? strdup(filename) : nullptr), + mapped(mapped), + size(size), + nextFile(LocalFile::validate(mapped)) // first Local File entry + , + nextDir(nullptr), + entries(nullptr) { + pthread_mutex_init(&mutex, nullptr); + // If the first local file entry couldn't be found (which can happen + // with optimized jars), check the first central directory entry. + if (!nextFile) GetFirstEntry(); +} + +Zip::~Zip() { + if (name) { + munmap(mapped, size); + DEBUG_LOG("Unmapped %s @%p", name, mapped); + free(name); + } + pthread_mutex_destroy(&mutex); +} + +bool Zip::GetStream(const char* path, Zip::Stream* out) const { + AutoLock lock(&mutex); + + DEBUG_LOG("%s - GetFile %s", name, path); + /* Fast path: if the Local File header on store matches, we can return the + * corresponding stream right away. + * However, the Local File header may not contain enough information, in + * which case the 3rd bit on the generalFlag is set. Unfortunately, this + * bit is also set in some archives even when we do have the data (most + * notably the android packages as built by the Mozilla build system). + * So instead of testing the generalFlag bit, only use the fast path when + * we haven't read the central directory entries yet, and when the + * compressed size as defined in the header is not filled (which is a + * normal condition for the bit to be set). */ + if (nextFile && nextFile->GetName().Equals(path) && !entries && + (nextFile->compressedSize != 0)) { + DEBUG_LOG("%s - %s was next file: fast path", name, path); + /* Fill Stream info from Local File header content */ + const char* data = reinterpret_cast<const char*>(nextFile->GetData()); + out->compressedBuf = data; + out->compressedSize = nextFile->compressedSize; + out->uncompressedSize = nextFile->uncompressedSize; + out->CRC32 = nextFile->CRC32; + out->type = static_cast<Stream::Type>(uint16_t(nextFile->compression)); + + /* Find the next Local File header. It is usually simply following the + * compressed stream, but in cases where the 3rd bit of the generalFlag + * is set, there is a Data Descriptor header before. */ + data += nextFile->compressedSize; + if ((nextFile->generalFlag & 0x8) && DataDescriptor::validate(data)) { + data += sizeof(DataDescriptor); + } + nextFile = LocalFile::validate(data); + return true; + } + + /* If the directory entry we have in store doesn't match, scan the Central + * Directory for the entry corresponding to the given path */ + if (!nextDir || !nextDir->GetName().Equals(path)) { + const DirectoryEntry* entry = GetFirstEntry(); + DEBUG_LOG("%s - Scan directory entries in search for %s", name, path); + while (entry && !entry->GetName().Equals(path)) { + entry = entry->GetNext(); + } + nextDir = entry; + } + if (!nextDir) { + DEBUG_LOG("%s - Couldn't find %s", name, path); + return false; + } + + /* Find the Local File header corresponding to the Directory entry that + * was found. */ + nextFile = + LocalFile::validate(static_cast<const char*>(mapped) + nextDir->offset); + if (!nextFile) { + ERROR("%s - Couldn't find the Local File header for %s", name, path); + return false; + } + + /* Fill Stream info from Directory entry content */ + const char* data = reinterpret_cast<const char*>(nextFile->GetData()); + out->compressedBuf = data; + out->compressedSize = nextDir->compressedSize; + out->uncompressedSize = nextDir->uncompressedSize; + out->CRC32 = nextDir->CRC32; + out->type = static_cast<Stream::Type>(uint16_t(nextDir->compression)); + + /* Store the next directory entry */ + nextDir = nextDir->GetNext(); + nextFile = nullptr; + return true; +} + +const Zip::DirectoryEntry* Zip::GetFirstEntry() const { + if (entries) return entries; + + const CentralDirectoryEnd* end = nullptr; + const char* _end = + static_cast<const char*>(mapped) + size - sizeof(CentralDirectoryEnd); + + /* Scan for the Central Directory End */ + for (; _end > mapped && !end; _end--) + end = CentralDirectoryEnd::validate(_end); + if (!end) { + ERROR("%s - Couldn't find end of central directory record", name); + return nullptr; + } + + entries = + DirectoryEntry::validate(static_cast<const char*>(mapped) + end->offset); + if (!entries) { + ERROR("%s - Couldn't find central directory record", name); + } + return entries; +} + +bool Zip::VerifyCRCs() const { + AutoLock lock(&mutex); + + for (const DirectoryEntry* entry = GetFirstEntry(); entry; + entry = entry->GetNext()) { + const LocalFile* file = + LocalFile::validate(static_cast<const char*>(mapped) + entry->offset); + uint32_t crc = crc32(0, nullptr, 0); + + DEBUG_LOG("%.*s: crc=%08x", int(entry->filenameSize), + reinterpret_cast<const char*>(entry) + sizeof(*entry), + uint32_t(entry->CRC32)); + + if (entry->compression == Stream::Type::STORE) { + crc = crc32(crc, static_cast<const uint8_t*>(file->GetData()), + entry->compressedSize); + DEBUG_LOG(" STORE size=%d crc=%08x", int(entry->compressedSize), crc); + + } else if (entry->compression == Stream::Type::DEFLATE) { + z_stream zstream; + Bytef buffer[1024]; + zstream.avail_in = entry->compressedSize; + zstream.next_in = + reinterpret_cast<Bytef*>(const_cast<void*>(file->GetData())); + zstream.zalloc = nullptr; + zstream.zfree = nullptr; + zstream.opaque = nullptr; + + if (inflateInit2(&zstream, -MAX_WBITS) != Z_OK) { + return false; + } + + for (;;) { + zstream.avail_out = sizeof(buffer); + zstream.next_out = buffer; + + int ret = inflate(&zstream, Z_SYNC_FLUSH); + crc = crc32(crc, buffer, sizeof(buffer) - zstream.avail_out); + + if (ret == Z_STREAM_END) { + break; + } else if (ret != Z_OK) { + return false; + } + } + + inflateEnd(&zstream); + DEBUG_LOG(" DEFLATE size=%d crc=%08x", int(zstream.total_out), crc); + + } else { + MOZ_ASSERT_UNREACHABLE("Unexpected stream type"); + continue; + } + + if (entry->CRC32 != crc) { + return false; + } + } + + return true; +} + +ZipCollection ZipCollection::Singleton; + +static pthread_mutex_t sZipCollectionMutex = PTHREAD_MUTEX_INITIALIZER; + +already_AddRefed<Zip> ZipCollection::GetZip(const char* path) { + { + AutoLock lock(&sZipCollectionMutex); + /* Search the list of Zips we already have for a match */ + for (const auto& zip : Singleton.zips) { + if (zip->GetName() && (strcmp(zip->GetName(), path) == 0)) { + return RefPtr<Zip>(zip).forget(); + } + } + } + return Zip::Create(path); +} + +void ZipCollection::Register(Zip* zip) { + AutoLock lock(&sZipCollectionMutex); + DEBUG_LOG("ZipCollection::Register(\"%s\")", zip->GetName()); + Singleton.zips.push_back(zip); +} + +void ZipCollection::Forget(const Zip* zip) { + AutoLock lock(&sZipCollectionMutex); + if (zip->refCount() > 1) { + // Someone has acquired a reference before we had acquired the lock, + // ignore this request. + return; + } + DEBUG_LOG("ZipCollection::Forget(\"%s\")", zip->GetName()); + const auto it = std::find(Singleton.zips.begin(), Singleton.zips.end(), zip); + if (*it == zip) { + Singleton.zips.erase(it); + } else { + DEBUG_LOG("ZipCollection::Forget: didn't find \"%s\" in bookkeeping", + zip->GetName()); + } +} diff --git a/mozglue/linker/Zip.h b/mozglue/linker/Zip.h new file mode 100644 index 0000000000..3e596c3c4c --- /dev/null +++ b/mozglue/linker/Zip.h @@ -0,0 +1,388 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef Zip_h +#define Zip_h + +#include <cstring> +#include <stdint.h> +#include <vector> +#include <zlib.h> +#include <pthread.h> +#include "Utils.h" +#include "mozilla/Assertions.h" +#include "mozilla/RefCounted.h" +#include "mozilla/RefPtr.h" + +/** + * Forward declaration + */ +class ZipCollection; + +/** + * Class to handle access to Zip archive streams. The Zip archive is mapped + * in memory, and streams are direct references to that mapped memory. + * Zip files are assumed to be correctly formed. No boundary checks are + * performed, which means hand-crafted malicious Zip archives can make the + * code fail in bad ways. However, since the only intended use is to load + * libraries from Zip archives, there is no interest in making this code + * safe, since the libraries could contain malicious code anyways. + */ +class Zip : public mozilla::external::AtomicRefCounted<Zip> { + public: + MOZ_DECLARE_REFCOUNTED_TYPENAME(Zip) + /** + * Create a Zip instance for the given file name. Returns nullptr in case + * of failure. + */ + static already_AddRefed<Zip> Create(const char* filename); + + /** + * Create a Zip instance using the given buffer. + */ + static already_AddRefed<Zip> Create(void* buffer, size_t size) { + return Create(nullptr, buffer, size); + } + + private: + static already_AddRefed<Zip> Create(const char* filename, void* buffer, + size_t size); + + /** + * Private constructor + */ + Zip(const char* filename, void* buffer, size_t size); + + public: + /** + * Destructor + */ + ~Zip(); + + /** + * Class used to access Zip archive item streams + */ + class Stream { + public: + /** + * Stream types + */ + enum Type { STORE = 0, DEFLATE = 8 }; + + /** + * Constructor + */ + Stream() + : compressedBuf(nullptr), + compressedSize(0), + uncompressedSize(0), + CRC32(0), + type(STORE) {} + + /** + * Getters + */ + const void* GetBuffer() { return compressedBuf; } + size_t GetSize() { return compressedSize; } + size_t GetUncompressedSize() { return uncompressedSize; } + size_t GetCRC32() { return CRC32; } + Type GetType() { return type; } + + /** + * Returns a z_stream for use with inflate functions using the given + * buffer as inflate output. The caller is expected to allocate enough + * memory for the Stream uncompressed size. + */ + z_stream GetZStream(void* buf) { + z_stream zStream; + zStream.avail_in = compressedSize; + zStream.next_in = + reinterpret_cast<Bytef*>(const_cast<void*>(compressedBuf)); + zStream.avail_out = uncompressedSize; + zStream.next_out = static_cast<Bytef*>(buf); + zStream.zalloc = nullptr; + zStream.zfree = nullptr; + zStream.opaque = nullptr; + return zStream; + } + + protected: + friend class Zip; + const void* compressedBuf; + size_t compressedSize; + size_t uncompressedSize; + size_t CRC32; + Type type; + }; + + /** + * Returns a stream from the Zip archive. + */ + bool GetStream(const char* path, Stream* out) const; + + /** + * Returns the file name of the archive + */ + const char* GetName() const { return name; } + + /** + * Returns whether all files have correct CRC checksum. + */ + bool VerifyCRCs() const; + + private: + /* File name of the archive */ + char* name; + /* Address where the Zip archive is mapped */ + void* mapped; + /* Size of the archive */ + size_t size; + + /** + * Strings (file names, comments, etc.) in the Zip headers are NOT zero + * terminated. This class is a helper around them. + */ + class StringBuf { + public: + /** + * Constructor + */ + StringBuf(const char* buf, size_t length) : buf(buf), length(length) {} + + /** + * Returns whether the string has the same content as the given zero + * terminated string. + */ + bool Equals(const char* str) const { + return (strncmp(str, buf, length) == 0 && str[length] == '\0'); + } + + private: + const char* buf; + size_t length; + }; + +/* All the following types need to be packed */ +#pragma pack(1) + public: + /** + * A Zip archive is an aggregate of entities which all start with a + * signature giving their type. This template is to be used as a base + * class for these entities. + */ + template <typename T> + class SignedEntity { + public: + /** + * Equivalent to reinterpret_cast<const T *>(buf), with an additional + * check of the signature. + */ + static const T* validate(const void* buf) { + const T* ret = static_cast<const T*>(buf); + if (ret->signature == T::magic) return ret; + return nullptr; + } + + explicit SignedEntity(uint32_t magic) : signature(magic) {} + + private: + le_uint32 signature; + }; + + private: + /** + * Header used to describe a Local File entry. The header is followed by + * the file name and an extra field, then by the data stream. + */ + struct LocalFile : public SignedEntity<LocalFile> { + /* Signature for a Local File header */ + static const uint32_t magic = 0x04034b50; + + /** + * Returns the file name + */ + StringBuf GetName() const { + return StringBuf(reinterpret_cast<const char*>(this) + sizeof(*this), + filenameSize); + } + + /** + * Returns a pointer to the data associated with this header + */ + const void* GetData() const { + return reinterpret_cast<const char*>(this) + sizeof(*this) + + filenameSize + extraFieldSize; + } + + le_uint16 minVersion; + le_uint16 generalFlag; + le_uint16 compression; + le_uint16 lastModifiedTime; + le_uint16 lastModifiedDate; + le_uint32 CRC32; + le_uint32 compressedSize; + le_uint32 uncompressedSize; + le_uint16 filenameSize; + le_uint16 extraFieldSize; + }; + + /** + * In some cases, when a zip archive is created, compressed size and CRC + * are not known when writing the Local File header. In these cases, the + * 3rd bit of the general flag in the Local File header is set, and there + * is an additional header following the compressed data. + */ + struct DataDescriptor : public SignedEntity<DataDescriptor> { + /* Signature for a Data Descriptor header */ + static const uint32_t magic = 0x08074b50; + + le_uint32 CRC32; + le_uint32 compressedSize; + le_uint32 uncompressedSize; + }; + + /** + * Header used to describe a Central Directory Entry. The header is + * followed by the file name, an extra field, and a comment. + */ + struct DirectoryEntry : public SignedEntity<DirectoryEntry> { + /* Signature for a Central Directory Entry header */ + static const uint32_t magic = 0x02014b50; + + /** + * Returns the file name + */ + StringBuf GetName() const { + return StringBuf(reinterpret_cast<const char*>(this) + sizeof(*this), + filenameSize); + } + + /** + * Returns the Central Directory Entry following this one. + */ + const DirectoryEntry* GetNext() const { + return validate(reinterpret_cast<const char*>(this) + sizeof(*this) + + filenameSize + extraFieldSize + fileCommentSize); + } + + le_uint16 creatorVersion; + le_uint16 minVersion; + le_uint16 generalFlag; + le_uint16 compression; + le_uint16 lastModifiedTime; + le_uint16 lastModifiedDate; + le_uint32 CRC32; + le_uint32 compressedSize; + le_uint32 uncompressedSize; + le_uint16 filenameSize; + le_uint16 extraFieldSize; + le_uint16 fileCommentSize; + le_uint16 diskNum; + le_uint16 internalAttributes; + le_uint32 externalAttributes; + le_uint32 offset; + }; + + /** + * Header used to describe the End of Central Directory Record. + */ + struct CentralDirectoryEnd : public SignedEntity<CentralDirectoryEnd> { + /* Signature for the End of Central Directory Record */ + static const uint32_t magic = 0x06054b50; + + le_uint16 diskNum; + le_uint16 startDisk; + le_uint16 recordsOnDisk; + le_uint16 records; + le_uint32 size; + le_uint32 offset; + le_uint16 commentSize; + }; +#pragma pack() + + /** + * Returns the first Directory entry + */ + const DirectoryEntry* GetFirstEntry() const; + + /* Pointer to the Local File Entry following the last one GetStream() used. + * This is used by GetStream to avoid scanning the Directory Entries when the + * requested entry is that one. */ + mutable const LocalFile* nextFile; + + /* Likewise for the next Directory entry */ + mutable const DirectoryEntry* nextDir; + + /* Pointer to the Directory entries */ + mutable const DirectoryEntry* entries; + + mutable pthread_mutex_t mutex; +}; + +/** + * Class for bookkeeping Zip instances + */ +class ZipCollection { + public: + static ZipCollection Singleton; + + /** + * Get a Zip instance for the given path. If there is an existing one + * already, return that one, otherwise create a new one. + */ + static already_AddRefed<Zip> GetZip(const char* path); + + protected: + friend class Zip; + friend class mozilla::detail::RefCounted<Zip, + mozilla::detail::AtomicRefCount>; + + /** + * Register the given Zip instance. This method is meant to be called + * by Zip::Create. + */ + static void Register(Zip* zip); + + /** + * Forget about the given Zip instance. This method is meant to be called + * by the Zip destructor. + */ + static void Forget(const Zip* zip); + + private: + /* Zip instances bookkept in this collection */ + std::vector<RefPtr<Zip>> zips; +}; + +namespace mozilla { +namespace detail { + +template <> +inline void RefCounted<Zip, AtomicRefCount>::Release() const { + MOZ_ASSERT(static_cast<int32_t>(mRefCnt) > 0); + const auto count = --mRefCnt; + if (count == 1) { + // No external references are left, attempt to remove it from the + // collection. If it's successfully removed from the collection, Release() + // will be called with mRefCnt = 1, which will finally delete this zip. + ZipCollection::Forget(static_cast<const Zip*>(this)); + } else if (count == 0) { +#ifdef DEBUG + mRefCnt = detail::DEAD; +#endif + delete static_cast<const Zip*>(this); + } +} + +#ifdef DEBUG +template <> +inline RefCounted<Zip, AtomicRefCount>::~RefCounted() { + MOZ_ASSERT(mRefCnt == detail::DEAD); +} +#endif + +} // namespace detail +} // namespace mozilla + +#endif /* Zip_h */ diff --git a/mozglue/linker/moz.build b/mozglue/linker/moz.build new file mode 100644 index 0000000000..2eb2be9e35 --- /dev/null +++ b/mozglue/linker/moz.build @@ -0,0 +1,33 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +if CONFIG["MOZ_LINKER"]: + SOURCES += [ + "BaseElf.cpp", + "CustomElf.cpp", + "ElfLoader.cpp", + "Mappable.cpp", + "XZStream.cpp", + ] + +# When the linker is disabled, we still need Zip for mozglue/android. +# Logging is a required dependency. +SOURCES += [ + "Logging.cpp", + "Zip.cpp", +] + +Library("linker") + +FINAL_LIBRARY = "mozglue" + +TEST_DIRS += ["tests"] + +DEFINES["XZ_USE_CRC64"] = 1 + +USE_LIBS += [ + "xz-embedded", +] diff --git a/mozglue/linker/tests/TestZip.cpp b/mozglue/linker/tests/TestZip.cpp new file mode 100644 index 0000000000..a2d2b10bdd --- /dev/null +++ b/mozglue/linker/tests/TestZip.cpp @@ -0,0 +1,61 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <cstdio> +#include <unistd.h> +#include "Zip.h" +#include "mozilla/RefPtr.h" + +#include "gtest/gtest.h" + +Logging Logging::Singleton; + +/** + * test.zip is a basic test zip file with a central directory. It contains + * four entries, in the following order: + * "foo", "bar", "baz", "qux". + * The entries are going to be read out of order. + */ +extern const unsigned char TEST_ZIP[]; +extern const unsigned int TEST_ZIP_SIZE; +const char* test_entries[] = {"baz", "foo", "bar", "qux"}; + +/** + * no_central_dir.zip is a hand crafted test zip with no central directory + * entries. The Zip reader is expected to be able to traverse these entries + * if requested in order, without reading a central directory + * - First entry is a file "a", STOREd. + * - Second entry is a file "b", STOREd, using a data descriptor. CRC is + * unknown, but compressed and uncompressed sizes are known in the local + * file header. + * - Third entry is a file "c", DEFLATEd, using a data descriptor. CRC, + * compressed and uncompressed sizes are known in the local file header. + * This is the kind of entry that can be found in a zip that went through + * zipalign if it had a data descriptor originally. + * - Fourth entry is a file "d", STOREd. + */ +extern const unsigned char NO_CENTRAL_DIR_ZIP[]; +extern const unsigned int NO_CENTRAL_DIR_ZIP_SIZE; +const char* no_central_dir_entries[] = {"a", "b", "c", "d"}; + +TEST(Zip, TestZip) +{ + Zip::Stream s; + RefPtr<Zip> z = Zip::Create((void*)TEST_ZIP, TEST_ZIP_SIZE); + for (auto& entry : test_entries) { + ASSERT_TRUE(z->GetStream(entry, &s)) + << "Could not get entry \"" << entry << "\""; + } +} + +TEST(Zip, NoCentralDir) +{ + Zip::Stream s; + RefPtr<Zip> z = + Zip::Create((void*)NO_CENTRAL_DIR_ZIP, NO_CENTRAL_DIR_ZIP_SIZE); + for (auto& entry : no_central_dir_entries) { + ASSERT_TRUE(z->GetStream(entry, &s)) + << "Could not get entry \"" << entry << "\""; + } +} diff --git a/mozglue/linker/tests/TestZipData.S b/mozglue/linker/tests/TestZipData.S new file mode 100644 index 0000000000..5fbb825451 --- /dev/null +++ b/mozglue/linker/tests/TestZipData.S @@ -0,0 +1,17 @@ +.macro zip_data name, path + .global \name + .data + .balign 16 + \name: + .incbin "\path" + .L\name\()_END: + .size \name, .L\name\()_END-\name + .global \name\()_SIZE + .data + .balign 4 + \name\()_SIZE: + .int .L\name\()_END-\name +.endm + +zip_data TEST_ZIP, "test.zip" +zip_data NO_CENTRAL_DIR_ZIP, "no_central_dir.zip" diff --git a/mozglue/linker/tests/moz.build b/mozglue/linker/tests/moz.build new file mode 100644 index 0000000000..4ecc93b190 --- /dev/null +++ b/mozglue/linker/tests/moz.build @@ -0,0 +1,20 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +FINAL_LIBRARY = "xul-gtest" + +UNIFIED_SOURCES += [ + "../Zip.cpp", + "TestZip.cpp", +] + +SOURCES += [ + "TestZipData.S", +] + +LOCAL_INCLUDES += [".."] + +ASFLAGS += ["-I", SRCDIR] diff --git a/mozglue/linker/tests/no_central_dir.zip b/mozglue/linker/tests/no_central_dir.zip Binary files differnew file mode 100644 index 0000000000..df882220d1 --- /dev/null +++ b/mozglue/linker/tests/no_central_dir.zip diff --git a/mozglue/linker/tests/test.zip b/mozglue/linker/tests/test.zip Binary files differnew file mode 100644 index 0000000000..657835b0ca --- /dev/null +++ b/mozglue/linker/tests/test.zip |