diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/goblin/src/mach | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/goblin/src/mach')
-rw-r--r-- | third_party/rust/goblin/src/mach/bind_opcodes.rs | 58 | ||||
-rw-r--r-- | third_party/rust/goblin/src/mach/constants.rs | 448 | ||||
-rw-r--r-- | third_party/rust/goblin/src/mach/exports.rs | 279 | ||||
-rw-r--r-- | third_party/rust/goblin/src/mach/fat.rs | 126 | ||||
-rw-r--r-- | third_party/rust/goblin/src/mach/header.rs | 439 | ||||
-rw-r--r-- | third_party/rust/goblin/src/mach/imports.rs | 277 | ||||
-rw-r--r-- | third_party/rust/goblin/src/mach/load_command.rs | 1513 | ||||
-rw-r--r-- | third_party/rust/goblin/src/mach/mod.rs | 413 | ||||
-rw-r--r-- | third_party/rust/goblin/src/mach/relocation.rs | 228 | ||||
-rw-r--r-- | third_party/rust/goblin/src/mach/segment.rs | 518 | ||||
-rw-r--r-- | third_party/rust/goblin/src/mach/symbols.rs | 469 |
11 files changed, 4768 insertions, 0 deletions
diff --git a/third_party/rust/goblin/src/mach/bind_opcodes.rs b/third_party/rust/goblin/src/mach/bind_opcodes.rs new file mode 100644 index 0000000000..b26e367eb2 --- /dev/null +++ b/third_party/rust/goblin/src/mach/bind_opcodes.rs @@ -0,0 +1,58 @@ +//! Bind opcodes are interpreted by the dynamic linker to efficiently collect every symbol imported by this binary, and from which library using two-level namespacing +//! +//! Some uses of external symbols do not need to be bound immediately. +//! Instead they can be lazily bound on first use. The lazy_bind +//! are contains a stream of BIND opcodes to bind all lazy symbols. +//! Normal use is that dyld ignores the lazy_bind section when +//! loading an image. Instead the static linker arranged for a +//! lazy pointer to initially point to a helper function which +//! pushes the offset into the lazy_bind area for the symbol +//! needing to be bound, then jumps to dyld which simply adds +//! the offset to lazy_bind_off to get the information on what +//! to bind. + +pub type Opcode = u8; + +// The following are used to encode binding information +pub const BIND_TYPE_POINTER : u8 = 1; +pub const BIND_TYPE_TEXT_ABSOLUTE32 : u8 = 2; +pub const BIND_TYPE_TEXT_PCREL32 : u8 = 3; +pub const BIND_SPECIAL_DYLIB_SELF : u8 = 0; +pub const BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE : u8 = 0xf; // -1 +pub const BIND_SPECIAL_DYLIB_FLAT_LOOKUP : u8 = 0xe; // -2 +pub const BIND_SYMBOL_FLAGS_WEAK_IMPORT : u8 = 0x1; +pub const BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION : u8 = 0x8; +pub const BIND_OPCODE_MASK : u8 = 0xF0; +pub const BIND_IMMEDIATE_MASK : u8 = 0x0F; +pub const BIND_OPCODE_DONE : Opcode = 0x00; +pub const BIND_OPCODE_SET_DYLIB_ORDINAL_IMM : Opcode = 0x10; +pub const BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB : Opcode = 0x20; +pub const BIND_OPCODE_SET_DYLIB_SPECIAL_IMM : Opcode = 0x30; +pub const BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM : Opcode = 0x40; +pub const BIND_OPCODE_SET_TYPE_IMM : Opcode = 0x50; +pub const BIND_OPCODE_SET_ADDEND_SLEB : Opcode = 0x60; +pub const BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB : Opcode = 0x70; +pub const BIND_OPCODE_ADD_ADDR_ULEB : Opcode = 0x80; +pub const BIND_OPCODE_DO_BIND : Opcode = 0x90; +pub const BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB : Opcode = 0xA0; +pub const BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED : Opcode = 0xB0; +pub const BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB : Opcode = 0xC0; + +pub fn opcode_to_str(opcode: Opcode) -> &'static str { + match opcode { + BIND_OPCODE_DONE => "BIND_OPCODE_DONE", + BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM", + BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB", + BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM", + BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", + BIND_OPCODE_SET_TYPE_IMM => "BIND_OPCODE_SET_TYPE_IMM", + BIND_OPCODE_SET_ADDEND_SLEB => "BIND_OPCODE_SET_ADDEND_SLEB", + BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", + BIND_OPCODE_ADD_ADDR_ULEB => "BIND_OPCODE_ADD_ADDR_ULEB", + BIND_OPCODE_DO_BIND => "BIND_OPCODE_DO_BIND", + BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", + BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", + BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB", + _ => "UNKNOWN OPCODE" + } +} diff --git a/third_party/rust/goblin/src/mach/constants.rs b/third_party/rust/goblin/src/mach/constants.rs new file mode 100644 index 0000000000..b5afc427ec --- /dev/null +++ b/third_party/rust/goblin/src/mach/constants.rs @@ -0,0 +1,448 @@ +//! Miscellaneous constants used inside of and when constructing, Mach-o binaries +// Convienence constants for return values from dyld_get_sdk_version() and friends. +pub const DYLD_MACOSX_VERSION_10_4: u32 = 0x000A_0400; +pub const DYLD_MACOSX_VERSION_10_5: u32 = 0x000A_0500; +pub const DYLD_MACOSX_VERSION_10_6: u32 = 0x000A_0600; +pub const DYLD_MACOSX_VERSION_10_7: u32 = 0x000A_0700; +pub const DYLD_MACOSX_VERSION_10_8: u32 = 0x000A_0800; +pub const DYLD_MACOSX_VERSION_10_9: u32 = 0x000A_0900; +pub const DYLD_MACOSX_VERSION_10_10: u32 = 0x000A_0A00; +pub const DYLD_MACOSX_VERSION_10_11: u32 = 0x000A_0B00; +pub const DYLD_MACOSX_VERSION_10_12: u32 = 0x000A_0C00; +pub const DYLD_MACOSX_VERSION_10_13: u32 = 0x000A_0D00; + +pub const DYLD_IOS_VERSION_2_0: u32 = 0x0002_0000; +pub const DYLD_IOS_VERSION_2_1: u32 = 0x0002_0100; +pub const DYLD_IOS_VERSION_2_2: u32 = 0x0002_0200; +pub const DYLD_IOS_VERSION_3_0: u32 = 0x0003_0000; +pub const DYLD_IOS_VERSION_3_1: u32 = 0x0003_0100; +pub const DYLD_IOS_VERSION_3_2: u32 = 0x0003_0200; +pub const DYLD_IOS_VERSION_4_0: u32 = 0x0004_0000; +pub const DYLD_IOS_VERSION_4_1: u32 = 0x0004_0100; +pub const DYLD_IOS_VERSION_4_2: u32 = 0x0004_0200; +pub const DYLD_IOS_VERSION_4_3: u32 = 0x0004_0300; +pub const DYLD_IOS_VERSION_5_0: u32 = 0x0005_0000; +pub const DYLD_IOS_VERSION_5_1: u32 = 0x0005_0100; +pub const DYLD_IOS_VERSION_6_0: u32 = 0x0006_0000; +pub const DYLD_IOS_VERSION_6_1: u32 = 0x0006_0100; +pub const DYLD_IOS_VERSION_7_0: u32 = 0x0007_0000; +pub const DYLD_IOS_VERSION_7_1: u32 = 0x0007_0100; +pub const DYLD_IOS_VERSION_8_0: u32 = 0x0008_0000; +pub const DYLD_IOS_VERSION_9_0: u32 = 0x0009_0000; +pub const DYLD_IOS_VERSION_10_0: u32 = 0x000A_0000; +pub const DYLD_IOS_VERSION_11_0: u32 = 0x000B_0000; + +// Segment and Section Constants + +// The flags field of a section structure is separated into two parts a section +// type and section attributes. The section types are mutually exclusive (it +// can only have one type) but the section attributes are not (it may have more +// than one attribute). +/// 256 section types +pub const SECTION_TYPE: u32 = 0x0000_00ff; +/// 24 section attributes +pub const SECTION_ATTRIBUTES: u32 = 0xffff_ff00; + +// Constants for the type of a section +/// regular section +pub const S_REGULAR: u32 = 0x0; +/// zero fill on demand section +pub const S_ZEROFILL: u32 = 0x1; +/// section with only literal C strings +pub const S_CSTRING_LITERALS: u32 = 0x2; +/// section with only 4 byte literals +pub const S_4BYTE_LITERALS: u32 = 0x3; +/// section with only 8 byte literals +pub const S_8BYTE_LITERALS: u32 = 0x4; +/// section with only pointers to +pub const S_LITERAL_POINTERS: u32 = 0x5; + +// literals +// For the two types of symbol pointers sections and the symbol stubs section +// they have indirect symbol table entries. For each of the entries in the +// section the indirect symbol table entries, in corresponding order in the +// indirect symbol table, start at the index stored in the reserved1 field +// of the section structure. Since the indirect symbol table entries +// correspond to the entries in the section the number of indirect symbol table +// entries is inferred from the size of the section divided by the size of the +// entries in the section. For symbol pointers sections the size of the entries +// in the section is 4 bytes and for symbol stubs sections the byte size of the +// stubs is stored in the reserved2 field of the section structure. +/// section with only non-lazy symbol pointers +pub const S_NON_LAZY_SYMBOL_POINTERS: u32 = 0x6; +/// section with only lazy symbol pointers +pub const S_LAZY_SYMBOL_POINTERS: u32 = 0x7; +/// section with only symbol stubs, byte size of stub in the reserved2 field +pub const S_SYMBOL_STUBS: u32 = 0x8; +/// section with only function pointers for initialization +pub const S_MOD_INIT_FUNC_POINTERS: u32 = 0x9; +/// section with only function pointers for termination +pub const S_MOD_TERM_FUNC_POINTERS: u32 = 0xa; +/// section contains symbols that are to be coalesced +pub const S_COALESCED: u32 = 0xb; +/// zero fill on demand section that can be larger than 4 gigabytes) +pub const S_GB_ZEROFILL: u32 = 0xc; +/// section with only pairs of function pointers for interposing +pub const S_INTERPOSING: u32 = 0xd; +/// section with only 16 byte literals +pub const S_16BYTE_LITERALS: u32 = 0xe; +/// section contains DTrace Object Format +pub const S_DTRACE_DOF: u32 = 0xf; +/// section with only lazy symbol pointers to lazy loaded dylibs +pub const S_LAZY_DYLIB_SYMBOL_POINTERS: u32 = 0x10; + +// Section types to support thread local variables +/// template of initial values for TLVs +pub const S_THREAD_LOCAL_REGULAR: u32 = 0x11; +/// template of initial values for TLVs +pub const S_THREAD_LOCAL_ZEROFILL: u32 = 0x12; +/// TLV descriptors +pub const S_THREAD_LOCAL_VARIABLES: u32 = 0x13; +/// pointers to TLV descriptors +pub const S_THREAD_LOCAL_VARIABLE_POINTERS: u32 = 0x14; +/// functions to call to initialize TLV values +pub const S_THREAD_LOCAL_INIT_FUNCTION_POINTERS: u32 = 0x15; + +// Constants for the section attributes part of the flags field of a section +// structure. +/// User setable attributes +pub const SECTION_ATTRIBUTES_USR: u32 = 0xff00_0000; +/// section contains only true machine instructions +pub const S_ATTR_PURE_INSTRUCTIONS: u32 = 0x8000_0000; +/// section contains coalesced symbols that are not to be in a ranlib table of contents +pub const S_ATTR_NO_TOC: u32 = 0x4000_0000; +/// ok to strip static symbols in this section in files with the MH_DYLDLINK flag +pub const S_ATTR_STRIP_STATIC_SYMS: u32 = 0x2000_0000; +/// no dead stripping +pub const S_ATTR_NO_DEAD_STRIP: u32 = 0x1000_0000; +/// blocks are live if they reference live blocks +pub const S_ATTR_LIVE_SUPPORT: u32 = 0x0800_0000; +/// Used with i386 code stubs written on by dyld +pub const S_ATTR_SELF_MODIFYING_CODE: u32 = 0x0400_0000; + +// If a segment contains any sections marked with S_ATTR_DEBUG then all +// sections in that segment must have this attribute. No section other than +// a section marked with this attribute may reference the contents of this +// section. A section with this attribute may contain no symbols and must have +// a section type S_REGULAR. The static linker will not copy section contents +// from sections with this attribute into its output file. These sections +// generally contain DWARF debugging info. +/// debug section +pub const S_ATTR_DEBUG: u32 = 0x0200_0000; +/// system setable attributes +pub const SECTION_ATTRIBUTES_SYS: u32 = 0x00ff_ff00; +/// section contains some machine instructions +pub const S_ATTR_SOME_INSTRUCTIONS: u32 = 0x0000_0400; +/// section has external relocation entries +pub const S_ATTR_EXT_RELOC: u32 = 0x0000_0200; +/// section has local relocation entries +pub const S_ATTR_LOC_RELOC: u32 = 0x0000_0100; + +// The names of segments and sections in them are mostly meaningless to the +// link-editor. But there are few things to support traditional UNIX +// executables that require the link-editor and assembler to use some names +// agreed upon by convention. +// The initial protection of the "__TEXT" segment has write protection turned +// off (not writeable). +// The link-editor will allocate common symbols at the end of the "__common" +// section in the "__DATA" segment. It will create the section and segment +// if needed. + +// The currently known segment names and the section names in those segments +/// the pagezero segment which has no protections and catches NULL references for MH_EXECUTE files +pub const SEG_PAGEZERO: &str = "__PAGEZERO"; +/// the tradition UNIX text segment +pub const SEG_TEXT: &str = "__TEXT"; +/// the real text part of the text section no headers, and no padding +pub const SECT_TEXT: &str = "__text"; +/// the fvmlib initialization section +pub const SECT_FVMLIB_INIT0: &str = "__fvmlib_init0"; +/// the section following the fvmlib initialization section +pub const SECT_FVMLIB_INIT1: &str = "__fvmlib_init1"; +/// the tradition UNIX data segment +pub const SEG_DATA: &str = "__DATA"; +/// the real initialized data section no padding, no bss overlap +pub const SECT_DATA: &str = "__data"; +/// the real uninitialized data sectionno padding +pub const SECT_BSS: &str = "__bss"; +/// the section common symbols are allocated in by the link editor +pub const SECT_COMMON: &str = "__common"; +/// objective-C runtime segment +pub const SEG_OBJC: &str = "__OBJC"; +/// symbol table +pub const SECT_OBJC_SYMBOLS: &str = "__symbol_table"; +/// module information +pub const SECT_OBJC_MODULES: &str = "__module_info"; +/// string table +pub const SECT_OBJC_STRINGS: &str = "__selector_strs"; +/// string table +pub const SECT_OBJC_REFS: &str = "__selector_refs"; +/// the icon segment +pub const SEG_ICON: &str = "__ICON"; +/// the icon headers +pub const SECT_ICON_HEADER: &str = "__header"; +/// the icons in tiff format +pub const SECT_ICON_TIFF: &str = "__tiff"; +/// the segment containing all structs created and maintained by the link editor. Created with -seglinkedit option to ld(1) for MH_EXECUTE and FVMLIB file types only +pub const SEG_LINKEDIT: &str = "__LINKEDIT"; +/// the unix stack segment +pub const SEG_UNIXSTACK: &str = "__UNIXSTACK"; +/// the segment for the self (dyld) modifing code stubs that has read, write and execute permissions +pub const SEG_IMPORT: &str = "__IMPORT"; + +/// Segment is readable. +pub const VM_PROT_READ: u32 = 0x1; +/// Segment is writable. +pub const VM_PROT_WRITE: u32 = 0x2; +/// Segment is executable. +pub const VM_PROT_EXECUTE: u32 = 0x4; + +pub mod cputype { + + /// An alias for u32 + pub type CpuType = u32; + /// An alias for u32 + pub type CpuSubType = u32; + + /// the mask for CPU feature flags + pub const CPU_SUBTYPE_MASK: u32 = 0xff00_0000; + /// mask for architecture bits + pub const CPU_ARCH_MASK: CpuType = 0xff00_0000; + /// the mask for 64 bit ABI + pub const CPU_ARCH_ABI64: CpuType = 0x0100_0000; + /// the mask for ILP32 ABI on 64 bit hardware + pub const CPU_ARCH_ABI64_32: CpuType = 0x0200_0000; + + // CPU Types + pub const CPU_TYPE_ANY: CpuType = !0; + pub const CPU_TYPE_VAX: CpuType = 1; + pub const CPU_TYPE_MC680X0: CpuType = 6; + pub const CPU_TYPE_X86: CpuType = 7; + pub const CPU_TYPE_I386: CpuType = CPU_TYPE_X86; + pub const CPU_TYPE_X86_64: CpuType = (CPU_TYPE_X86 | CPU_ARCH_ABI64); + pub const CPU_TYPE_MIPS: CpuType = 8; + pub const CPU_TYPE_MC98000: CpuType = 10; + pub const CPU_TYPE_HPPA: CpuType = 11; + pub const CPU_TYPE_ARM: CpuType = 12; + pub const CPU_TYPE_ARM64: CpuType = (CPU_TYPE_ARM | CPU_ARCH_ABI64); + pub const CPU_TYPE_ARM64_32: CpuType = (CPU_TYPE_ARM | CPU_ARCH_ABI64_32); + pub const CPU_TYPE_MC88000: CpuType = 13; + pub const CPU_TYPE_SPARC: CpuType = 14; + pub const CPU_TYPE_I860: CpuType = 15; + pub const CPU_TYPE_ALPHA: CpuType = 16; + pub const CPU_TYPE_POWERPC: CpuType = 18; + pub const CPU_TYPE_POWERPC64: CpuType = (CPU_TYPE_POWERPC | CPU_ARCH_ABI64); + + // CPU Subtypes + pub const CPU_SUBTYPE_MULTIPLE: CpuSubType = !0; + pub const CPU_SUBTYPE_LITTLE_ENDIAN: CpuSubType = 0; + pub const CPU_SUBTYPE_BIG_ENDIAN: CpuSubType = 1; + pub const CPU_SUBTYPE_VAX_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_VAX780: CpuSubType = 1; + pub const CPU_SUBTYPE_VAX785: CpuSubType = 2; + pub const CPU_SUBTYPE_VAX750: CpuSubType = 3; + pub const CPU_SUBTYPE_VAX730: CpuSubType = 4; + pub const CPU_SUBTYPE_UVAXI: CpuSubType = 5; + pub const CPU_SUBTYPE_UVAXII: CpuSubType = 6; + pub const CPU_SUBTYPE_VAX8200: CpuSubType = 7; + pub const CPU_SUBTYPE_VAX8500: CpuSubType = 8; + pub const CPU_SUBTYPE_VAX8600: CpuSubType = 9; + pub const CPU_SUBTYPE_VAX8650: CpuSubType = 10; + pub const CPU_SUBTYPE_VAX8800: CpuSubType = 11; + pub const CPU_SUBTYPE_UVAXIII: CpuSubType = 12; + pub const CPU_SUBTYPE_MC680X0_ALL: CpuSubType = 1; + pub const CPU_SUBTYPE_MC68030: CpuSubType = 1; /* compat */ + pub const CPU_SUBTYPE_MC68040: CpuSubType = 2; + pub const CPU_SUBTYPE_MC68030_ONLY: CpuSubType = 3; + + macro_rules! CPU_SUBTYPE_INTEL { + ($f:expr, $m:expr) => ({ + ($f) + (($m) << 4) + }) + } + + pub const CPU_SUBTYPE_I386_ALL: CpuSubType = CPU_SUBTYPE_INTEL!(3, 0); + pub const CPU_SUBTYPE_386: CpuSubType = CPU_SUBTYPE_INTEL!(3, 0); + pub const CPU_SUBTYPE_486: CpuSubType = CPU_SUBTYPE_INTEL!(4, 0); + pub const CPU_SUBTYPE_486SX: CpuSubType = CPU_SUBTYPE_INTEL!(4, 8); // 8 << 4 = 128 + pub const CPU_SUBTYPE_586: CpuSubType = CPU_SUBTYPE_INTEL!(5, 0); + pub const CPU_SUBTYPE_PENT: CpuSubType = CPU_SUBTYPE_INTEL!(5, 0); + pub const CPU_SUBTYPE_PENTPRO: CpuSubType = CPU_SUBTYPE_INTEL!(6, 1); + pub const CPU_SUBTYPE_PENTII_M3: CpuSubType = CPU_SUBTYPE_INTEL!(6, 3); + pub const CPU_SUBTYPE_PENTII_M5: CpuSubType = CPU_SUBTYPE_INTEL!(6, 5); + pub const CPU_SUBTYPE_CELERON: CpuSubType = CPU_SUBTYPE_INTEL!(7, 6); + pub const CPU_SUBTYPE_CELERON_MOBILE: CpuSubType = CPU_SUBTYPE_INTEL!(7, 7); + pub const CPU_SUBTYPE_PENTIUM_3: CpuSubType = CPU_SUBTYPE_INTEL!(8, 0); + pub const CPU_SUBTYPE_PENTIUM_3_M: CpuSubType = CPU_SUBTYPE_INTEL!(8, 1); + pub const CPU_SUBTYPE_PENTIUM_3_XEON: CpuSubType = CPU_SUBTYPE_INTEL!(8, 2); + pub const CPU_SUBTYPE_PENTIUM_M: CpuSubType = CPU_SUBTYPE_INTEL!(9, 0); + pub const CPU_SUBTYPE_PENTIUM_4: CpuSubType = CPU_SUBTYPE_INTEL!(10, 0); + pub const CPU_SUBTYPE_PENTIUM_4_M: CpuSubType = CPU_SUBTYPE_INTEL!(10, 1); + pub const CPU_SUBTYPE_ITANIUM: CpuSubType = CPU_SUBTYPE_INTEL!(11, 0); + pub const CPU_SUBTYPE_ITANIUM_2: CpuSubType = CPU_SUBTYPE_INTEL!(11, 1); + pub const CPU_SUBTYPE_XEON: CpuSubType = CPU_SUBTYPE_INTEL!(12, 0); + pub const CPU_SUBTYPE_XEON_MP: CpuSubType = CPU_SUBTYPE_INTEL!(12, 1); + pub const CPU_SUBTYPE_INTEL_FAMILY_MAX: CpuSubType = 15; + pub const CPU_SUBTYPE_INTEL_MODEL_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_X86_ALL: CpuSubType = 3; + pub const CPU_SUBTYPE_X86_64_ALL: CpuSubType = 3; + pub const CPU_SUBTYPE_X86_ARCH1: CpuSubType = 4; + pub const CPU_SUBTYPE_X86_64_H: CpuSubType = 8; + pub const CPU_SUBTYPE_MIPS_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_MIPS_R2300: CpuSubType = 1; + pub const CPU_SUBTYPE_MIPS_R2600: CpuSubType = 2; + pub const CPU_SUBTYPE_MIPS_R2800: CpuSubType = 3; + pub const CPU_SUBTYPE_MIPS_R2000A: CpuSubType = 4; + pub const CPU_SUBTYPE_MIPS_R2000: CpuSubType = 5; + pub const CPU_SUBTYPE_MIPS_R3000A: CpuSubType = 6; + pub const CPU_SUBTYPE_MIPS_R3000: CpuSubType = 7; + pub const CPU_SUBTYPE_MC98000_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_MC98601: CpuSubType = 1; + pub const CPU_SUBTYPE_HPPA_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_HPPA_7100: CpuSubType = 0; + pub const CPU_SUBTYPE_HPPA_7100LC: CpuSubType = 1; + pub const CPU_SUBTYPE_MC88000_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_MC88100: CpuSubType = 1; + pub const CPU_SUBTYPE_MC88110: CpuSubType = 2; + pub const CPU_SUBTYPE_SPARC_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_I860_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_I860_860: CpuSubType = 1; + pub const CPU_SUBTYPE_POWERPC_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_POWERPC_601: CpuSubType = 1; + pub const CPU_SUBTYPE_POWERPC_602: CpuSubType = 2; + pub const CPU_SUBTYPE_POWERPC_603: CpuSubType = 3; + pub const CPU_SUBTYPE_POWERPC_603E: CpuSubType = 4; + pub const CPU_SUBTYPE_POWERPC_603EV: CpuSubType = 5; + pub const CPU_SUBTYPE_POWERPC_604: CpuSubType = 6; + pub const CPU_SUBTYPE_POWERPC_604E: CpuSubType = 7; + pub const CPU_SUBTYPE_POWERPC_620: CpuSubType = 8; + pub const CPU_SUBTYPE_POWERPC_750: CpuSubType = 9; + pub const CPU_SUBTYPE_POWERPC_7400: CpuSubType = 10; + pub const CPU_SUBTYPE_POWERPC_7450: CpuSubType = 11; + pub const CPU_SUBTYPE_POWERPC_970: CpuSubType = 100; + pub const CPU_SUBTYPE_ARM_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_ARM_V4T: CpuSubType = 5; + pub const CPU_SUBTYPE_ARM_V6: CpuSubType = 6; + pub const CPU_SUBTYPE_ARM_V5TEJ: CpuSubType = 7; + pub const CPU_SUBTYPE_ARM_XSCALE: CpuSubType = 8; + pub const CPU_SUBTYPE_ARM_V7: CpuSubType = 9; + pub const CPU_SUBTYPE_ARM_V7F: CpuSubType = 10; + pub const CPU_SUBTYPE_ARM_V7S: CpuSubType = 11; + pub const CPU_SUBTYPE_ARM_V7K: CpuSubType = 12; + pub const CPU_SUBTYPE_ARM_V6M: CpuSubType = 14; + pub const CPU_SUBTYPE_ARM_V7M: CpuSubType = 15; + pub const CPU_SUBTYPE_ARM_V7EM: CpuSubType = 16; + pub const CPU_SUBTYPE_ARM_V8: CpuSubType = 13; + pub const CPU_SUBTYPE_ARM64_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_ARM64_V8: CpuSubType = 1; + pub const CPU_SUBTYPE_ARM64_E: CpuSubType = 2; + pub const CPU_SUBTYPE_ARM64_32_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_ARM64_32_V8: CpuSubType = 1; + + macro_rules! cpu_flag_mapping { + ( + $(($name:expr, $cputype:ident, $cpusubtype:ident),)* + ) => { + fn get_arch_from_flag_no_alias(name: &str) -> Option<(CpuType, CpuSubType)> { + match name { + $($name => Some(($cputype, $cpusubtype)),)* + _ => None + } + } + + /// Get the architecture name from cputype and cpusubtype + pub fn get_arch_name_from_types(cputype: CpuType, cpusubtype: CpuSubType) + -> Option<&'static str> { + match (cputype, cpusubtype) { + $(($cputype, $cpusubtype) => Some($name),)* + (_, _) => None + } + } + } + } + + /// Get the cputype and cpusubtype from a name + pub fn get_arch_from_flag(name: &str) -> Option<(CpuType, CpuSubType)> { + get_arch_from_flag_no_alias(name).or_else(|| { + // we also handle some common aliases + match name { + // these are used by apple + "pentium" => Some((CPU_TYPE_I386, CPU_SUBTYPE_PENT)), + "pentpro" => Some((CPU_TYPE_I386, CPU_SUBTYPE_PENTPRO)), + // these are used commonly for consistency + "x86" => Some((CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL)), + _ => None, + } + }) + } + + cpu_flag_mapping! { + // generic types + ("any", CPU_TYPE_ANY, CPU_SUBTYPE_MULTIPLE), + ("little", CPU_TYPE_ANY, CPU_SUBTYPE_LITTLE_ENDIAN), + ("big", CPU_TYPE_ANY, CPU_SUBTYPE_BIG_ENDIAN), + + // macho names + ("ppc64", CPU_TYPE_POWERPC64, CPU_SUBTYPE_POWERPC_ALL), + ("x86_64", CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL), + ("x86_64h", CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_H), + ("arm64", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL), + ("arm64_32", CPU_TYPE_ARM64_32, CPU_SUBTYPE_ARM64_32_ALL), + ("ppc970-64", CPU_TYPE_POWERPC64, CPU_SUBTYPE_POWERPC_970), + ("ppc", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_ALL), + ("i386", CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL), + ("m68k", CPU_TYPE_MC680X0, CPU_SUBTYPE_MC680X0_ALL), + ("hppa", CPU_TYPE_HPPA, CPU_SUBTYPE_HPPA_ALL), + ("sparc", CPU_TYPE_SPARC, CPU_SUBTYPE_SPARC_ALL), + ("m88k", CPU_TYPE_MC88000, CPU_SUBTYPE_MC88000_ALL), + ("i860", CPU_TYPE_I860, CPU_SUBTYPE_I860_ALL), + ("arm", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_ALL), + ("ppc601", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_601), + ("ppc603", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_603), + ("ppc603e", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_603E), + ("ppc603ev", CPU_TYPE_POWERPC,CPU_SUBTYPE_POWERPC_603EV), + ("ppc604", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_604), + ("ppc604e",CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_604E), + ("ppc750", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_750), + ("ppc7400", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_7400), + ("ppc7450", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_7450), + ("ppc970", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_970), + ("i486", CPU_TYPE_I386, CPU_SUBTYPE_486), + ("i486SX", CPU_TYPE_I386, CPU_SUBTYPE_486SX), + ("i586", CPU_TYPE_I386, CPU_SUBTYPE_586), + ("i686", CPU_TYPE_I386, CPU_SUBTYPE_PENTPRO), + ("pentIIm3", CPU_TYPE_I386, CPU_SUBTYPE_PENTII_M3), + ("pentIIm5", CPU_TYPE_I386, CPU_SUBTYPE_PENTII_M5), + ("pentium4", CPU_TYPE_I386, CPU_SUBTYPE_PENTIUM_4), + ("m68030", CPU_TYPE_MC680X0, CPU_SUBTYPE_MC68030_ONLY), + ("m68040", CPU_TYPE_MC680X0, CPU_SUBTYPE_MC68040), + ("hppa7100LC", CPU_TYPE_HPPA, CPU_SUBTYPE_HPPA_7100LC), + ("armv4t", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V4T), + ("armv5", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V5TEJ), + ("xscale", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_XSCALE), + ("armv6", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6), + ("armv6m", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6M), + ("armv7", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7), + ("armv7f", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7F), + ("armv7s", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S), + ("armv7k", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7K), + ("armv7m", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7M), + ("armv7em", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7EM), + ("arm64v8", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_V8), + ("arm64e", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_E), + ("arm64_32_v8", CPU_TYPE_ARM64_32, CPU_SUBTYPE_ARM64_32_V8), + } +} + +#[cfg(test)] +mod tests { + #[test] + fn test_basic_mapping() { + use super::cputype::*; + + assert_eq!(get_arch_from_flag("armv7"), Some((CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7))); + assert_eq!(get_arch_name_from_types(CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7), Some("armv7")); + assert_eq!(get_arch_from_flag("i386"), Some((CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL))); + assert_eq!(get_arch_from_flag("x86"), Some((CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL))); + } +} diff --git a/third_party/rust/goblin/src/mach/exports.rs b/third_party/rust/goblin/src/mach/exports.rs new file mode 100644 index 0000000000..56b0fd2995 --- /dev/null +++ b/third_party/rust/goblin/src/mach/exports.rs @@ -0,0 +1,279 @@ +//! Symbols exported by this binary and available for dynamic linking are encoded in mach-o binaries using a special trie +//! +//! **Note**: the trie is constructed lazily in case it won't be used, and since computing exports will require allocation, to compute the exports, you need call the export trie's [exports()](struct.ExportTrie.html#method.exports) method. + +// TODO: +// (1) Weak of regular_symbol_info type probably needs to be added ? +// (3) /usr/lib/libstdc++.6.0.9.dylib has flag 0xc at many offsets... they're weak + +use core::ops::Range; +use scroll::{Pread, Uleb128}; +use crate::error; +use core::fmt::{self, Debug}; +use crate::mach::load_command; +use crate::alloc::vec::Vec; +use crate::alloc::string::String; + +type Flag = u64; + + // "The following are used on the flags byte of a terminal node + // in the export information." +pub const EXPORT_SYMBOL_FLAGS_KIND_MASK : Flag = 0x03; +pub const EXPORT_SYMBOL_FLAGS_KIND_REGULAR : Flag = 0x00; +pub const EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE : Flag = 0x02; // this is a symbol not present in the loader.h but only in the dyld compressed image loader source code, and only available with a #def macro for export flags but libobjc. def has this +pub const EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL : Flag = 0x01; +pub const EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION : Flag = 0x04; +pub const EXPORT_SYMBOL_FLAGS_REEXPORT : Flag = 0x08; +pub const EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER : Flag = 0x10; + +#[derive(Debug)] +pub enum SymbolKind { + Regular, + Absolute, + ThreadLocal, + UnknownSymbolKind(Flag), +} + +impl SymbolKind { + pub fn new(kind: Flag) -> SymbolKind { + match kind & EXPORT_SYMBOL_FLAGS_KIND_MASK { + 0x00 => SymbolKind::Regular, + 0x01 => SymbolKind::ThreadLocal, + 0x02 => SymbolKind::Absolute, + _ => SymbolKind::UnknownSymbolKind(kind), + } + } + pub fn to_str(&self) -> &'static str { + match self { + SymbolKind::Regular => "Regular", + SymbolKind::Absolute => "Absolute", + SymbolKind::ThreadLocal => "Thread_LOCAL", + SymbolKind::UnknownSymbolKind(_k) => "Unknown", + } + } +} + +#[derive(Debug)] +/// An export can be a regular export, a re-export, or a stub +pub enum ExportInfo<'a> { + /// A regular exported symbol, which is an address where it is found, and the flags associated with it + Regular { + address: u64, + flags: Flag, + }, + /// if lib_symbol_name None then same symbol name, otherwise reexport of lib_symbol_name with name in the trie + /// "If the string is zero length, then the symbol is re-export from the specified dylib with the same name" + Reexport { + lib: &'a str, + lib_symbol_name: Option<&'a str>, + flags: Flag, + }, + /// If the flags is `EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER`, then following the flags are two `Uleb128`s: the stub offset and the resolver offset. The stub is used by non-lazy pointers. The resolver is used by lazy pointers and must be called to get the actual address to use + Stub { + stub_offset: scroll::Uleb128, + resolver_offset: scroll::Uleb128, + flags: Flag, + }, +} + +impl<'a> ExportInfo<'a> { + /// Parse out the export info from `bytes`, at `offset` + pub fn parse(bytes: &'a [u8], libs: &[&'a str], flags: Flag, mut offset: usize) -> error::Result<ExportInfo<'a>> { + use self::ExportInfo::*; + let regular = |offset| -> error::Result<ExportInfo> { + let address = bytes.pread::<Uleb128>(offset)?; + Ok(Regular { + address: address.into(), + flags + }) + }; + let reexport = |mut offset| -> error::Result<ExportInfo<'a>> { + let lib_ordinal: u64 = { + let tmp = bytes.pread::<Uleb128>(offset)?; + offset += tmp.size(); + tmp.into() + }; + let lib_symbol_name = bytes.pread::<&str>(offset)?; + let lib = libs[lib_ordinal as usize]; + let lib_symbol_name = if lib_symbol_name == "" { None } else { Some (lib_symbol_name)}; + Ok(Reexport { + lib, + lib_symbol_name, + flags + }) + }; + match SymbolKind::new(flags) { + SymbolKind::Regular => { + if flags & EXPORT_SYMBOL_FLAGS_REEXPORT != 0 { + reexport(offset) + } else if flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER != 0 { // 0x10 + let stub_offset = bytes.pread::<Uleb128>(offset)?; + offset += stub_offset.size(); + let resolver_offset = bytes.pread::<Uleb128>(offset)?; + Ok(Stub { + stub_offset, + resolver_offset, + flags + }) + // else if (flags = kEXPORT_SYMBOL_FLAGS_WEAK_DEFINITION) then (*0x40 unused*) + } else { + regular(offset) + } + }, + SymbolKind::ThreadLocal | SymbolKind::Absolute => { + if flags & EXPORT_SYMBOL_FLAGS_REEXPORT != 0 { + reexport(offset) + } else { + regular(offset) + } + }, + SymbolKind::UnknownSymbolKind(_kind) => { + // 0x5f causes errors, but parsing as regular symbol resolves... + //Err(error::Error::Malformed(format!("Unknown kind {:#x} from flags {:#x} in get_symbol_type at offset {}", kind, flags, offset))) + regular(offset) + } + } + } +} + +#[derive(Debug)] +/// A finalized symbolic export reconstructed from the export trie +pub struct Export<'a> { + /// The reconsituted export name which dyld matches against + pub name: String, + /// The export info in the node data + pub info: ExportInfo<'a>, + /// How large this export is + pub size: usize, + /// The offset this symbol export is found in the binary + pub offset: u64, +} + +impl<'a> Export<'a> { + /// Create a new export from `name` and `info` + pub fn new(name: String, info: ExportInfo<'a>) -> Export<'a> { + let offset = match info { + ExportInfo::Regular { address, .. } => address, + _ => 0x0, + }; + Export { name, info, size: 0, offset } + } +} + +/// An export trie efficiently encodes all of the symbols exported by this binary for dynamic linking +pub struct ExportTrie<'a> { + data: &'a [u8], + location: Range<usize>, +} + +impl<'a> ExportTrie<'a> { + + #[inline] + fn walk_nodes(&self, libs: &[&'a str], branches: Vec<(String, usize)>, acc: &mut Vec<Export<'a>>) -> error::Result<()> { + for (symbol, next_node) in branches { + self.walk_trie(libs, symbol, next_node, acc)?; + } + Ok(()) + } + + // current_symbol can be a str iiuc + fn walk_branches(&self, nbranches: usize, current_symbol: String, mut offset: usize) -> error::Result<Vec<(String, usize)>> { + let mut branches = Vec::with_capacity(nbranches); + //println!("\t@{:#x}", *offset); + for _i in 0..nbranches { + // additional offset calculations are relative to the base we received + let offset = &mut offset; + let string = self.data.pread::<&str>(*offset)?; + let mut key = current_symbol.clone(); + key.push_str(string); + // +1 for null terminator + *offset = *offset + string.len() + 1; + //println!("\t({}) string_len: {} offset: {:#x}", i, string.len(), *offset); + // value is relative to export trie base + let next_node = Uleb128::read(&self.data, offset)? as usize + self.location.start; + //println!("\t({}) string: {} next_node: {:#x}", _i, key, next_node); + branches.push((key, next_node)); + } + Ok(branches) + } + + fn walk_trie(&self, libs: &[&'a str], current_symbol: String, start: usize, exports: &mut Vec<Export<'a>>) -> error::Result<()> { + if start < self.location.end { + let mut offset = start; + let terminal_size = Uleb128::read(&self.data, &mut offset)?; + // let mut input = String::new(); + // ::std::io::stdin().read_line(&mut input).unwrap(); + // println!("@ {:#x} node: {:#x} current_symbol: {}", start, terminal_size, current_symbol); + if terminal_size == 0 { + let nbranches = Uleb128::read(&self.data, &mut offset)? as usize; + //println!("\t@ {:#x} BRAN {}", *offset, nbranches); + let branches = self.walk_branches(nbranches, current_symbol, offset)?; + self.walk_nodes(libs, branches, exports) + } else { // terminal node, but the tricky part is that they can have children... + let pos = offset; + let children_start = &mut (pos + terminal_size as usize); + let nchildren = Uleb128::read(&self.data, children_start)? as usize; + let flags = Uleb128::read(&self.data, &mut offset)?; + //println!("\t@ {:#x} TERM {} flags: {:#x}", offset, nchildren, flags); + let info = ExportInfo::parse(&self.data, libs, flags, offset)?; + let export = Export::new(current_symbol.clone(), info); + //println!("\t{:?}", &export); + exports.push(export); + if nchildren == 0 { + // this branch is done + Ok(()) + } else { + // more branches to walk + let branches = self.walk_branches(nchildren, current_symbol, *children_start)?; + self.walk_nodes(libs, branches, exports) + } + } + } else { Ok(()) } + } + + /// Walk the export trie for symbols exported by this binary, using the provided `libs` to resolve re-exports + pub fn exports(&self, libs: &[&'a str]) -> error::Result<Vec<Export<'a>>> { + let offset = self.location.start; + let current_symbol = String::new(); + let mut exports = Vec::new(); + self.walk_trie(libs, current_symbol, offset, &mut exports)?; + Ok(exports) + } + + /// Create a new, lazy, zero-copy export trie from the `DyldInfo` `command` + pub fn new(bytes: &'a [u8], command: &load_command::DyldInfoCommand) -> Self { + let start = command.export_off as usize; + let end = (command.export_size + command.export_off) as usize; + ExportTrie { + data: bytes, + location: start..end, + } + } +} + +impl<'a> Debug for ExportTrie<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("ExportTrie") + .field("data", &"<... redacted ...>") + .field("location", &format_args!("{:#x}..{:#x}", self.location.start, self.location.end)) + .finish() + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn export_trie () { + const EXPORTS: [u8; 64] = [0x00,0x01,0x5f,0x00,0x05,0x00,0x02,0x5f,0x6d,0x68,0x5f,0x65,0x78,0x65,0x63,0x75,0x74,0x65,0x5f,0x68,0x65,0x61,0x64,0x65,0x72,0x00,0x1f,0x6d,0x61,0x00,0x23,0x02,0x00,0x00,0x00,0x00,0x02,0x78,0x69,0x6d,0x75,0x6d,0x00,0x30,0x69,0x6e,0x00,0x35,0x03,0x00,0xc0,0x1e,0x00,0x03,0x00,0xd0,0x1e,0x00,0x00,0x00,0x00,0x00,0x00,0x00]; + let exports = &EXPORTS[..]; + let libs = vec!["/usr/lib/libderp.so", "/usr/lib/libthuglife.so"]; + let mut command = load_command::DyldInfoCommand::default(); + command.export_size = exports.len() as u32; + let trie = ExportTrie::new(&exports, &command); + println!("trie: {:#?}", &trie); + let exports = trie.exports(&libs).unwrap(); + println!("len: {} exports: {:#?}", exports.len(), &exports); + assert_eq!(exports.len() as usize, 3usize) + } +} diff --git a/third_party/rust/goblin/src/mach/fat.rs b/third_party/rust/goblin/src/mach/fat.rs new file mode 100644 index 0000000000..28d795936e --- /dev/null +++ b/third_party/rust/goblin/src/mach/fat.rs @@ -0,0 +1,126 @@ +//! A Mach-o fat binary is a multi-architecture binary container + +use core::fmt; + +if_std! { + use std::fs::File; + use std::io::{self, Read}; +} + +use scroll::{Pread, Pwrite, SizeWith}; +use crate::mach::constants::cputype::{CpuType, CpuSubType, CPU_SUBTYPE_MASK, CPU_ARCH_ABI64}; +use crate::error; + +pub const FAT_MAGIC: u32 = 0xcafe_babe; +pub const FAT_CIGAM: u32 = 0xbeba_feca; + +#[repr(C)] +#[derive(Clone, Copy, Default, Pread, Pwrite, SizeWith)] +/// The Mach-o `FatHeader` always has its data bigendian +pub struct FatHeader { + /// The magic number, `cafebabe` + pub magic: u32, + /// How many fat architecture headers there are + pub nfat_arch: u32, +} + +pub const SIZEOF_FAT_HEADER: usize = 8; + +impl fmt::Debug for FatHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("FatHeader") + .field("magic", &format_args!("0x{:x}", self.magic)) + .field("nfat_arch", &self.nfat_arch) + .finish() + } +} + +impl FatHeader { + /// Reinterpret a `FatHeader` from `bytes` + pub fn from_bytes(bytes: [u8; SIZEOF_FAT_HEADER]) -> FatHeader { + let mut offset = 0; + let magic = bytes.gread_with(&mut offset, scroll::BE).unwrap(); + let nfat_arch = bytes.gread_with(&mut offset, scroll::BE).unwrap(); + FatHeader { + magic, + nfat_arch, + } + } + + /// Reads a `FatHeader` from a `File` on disk + #[cfg(feature = "std")] + pub fn from_fd(fd: &mut File) -> io::Result<FatHeader> { + let mut header = [0; SIZEOF_FAT_HEADER]; + fd.read_exact(&mut header)?; + Ok(FatHeader::from_bytes(header)) + } + + /// Parse a mach-o fat header from the `bytes` + pub fn parse(bytes: &[u8]) -> error::Result<FatHeader> { + Ok(bytes.pread_with::<FatHeader>(0, scroll::BE)?) + } + +} + +#[repr(C)] +#[derive(Clone, Copy, Default, Pread, Pwrite, SizeWith)] +/// The Mach-o `FatArch` always has its data bigendian +pub struct FatArch { + /// What kind of CPU this binary is + pub cputype: u32, + pub cpusubtype: u32, + /// Where in the fat binary it starts + pub offset: u32, + /// How big the binary is + pub size: u32, + pub align: u32, +} + +pub const SIZEOF_FAT_ARCH: usize = 20; + +impl fmt::Debug for FatArch { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("FatArch") + .field("cputype", &self.cputype()) + .field("cmdsize", &self.cpusubtype()) + .field("offset", &format_args!("{:#x}", &self.offset)) + .field("size", &self.size) + .field("align", &self.align) + .finish() + } +} + +impl FatArch { + /// Get the slice of bytes this header describes from `bytes` + pub fn slice<'a>(&self, bytes: &'a [u8]) -> &'a [u8] { + let start = self.offset as usize; + let end = (self.offset + self.size) as usize; + &bytes[start..end] + } + + /// Returns the cpu type + pub fn cputype(&self) -> CpuType { + self.cputype + } + + /// Returns the cpu subtype with the capabilities removed + pub fn cpusubtype(&self) -> CpuSubType { + self.cpusubtype & !CPU_SUBTYPE_MASK + } + + /// Returns the capabilities of the CPU + pub fn cpu_caps(&self) -> u32 { + (self.cpusubtype & CPU_SUBTYPE_MASK) >> 24 + } + + /// Whether this fat architecture header describes a 64-bit binary + pub fn is_64(&self) -> bool { + (self.cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64 + } + + /// Parse a `FatArch` header from `bytes` at `offset` + pub fn parse(bytes: &[u8], offset: usize) -> error::Result<Self> { + let arch = bytes.pread_with::<FatArch>(offset, scroll::BE)?; + Ok(arch) + } +} diff --git a/third_party/rust/goblin/src/mach/header.rs b/third_party/rust/goblin/src/mach/header.rs new file mode 100644 index 0000000000..d19aaccc74 --- /dev/null +++ b/third_party/rust/goblin/src/mach/header.rs @@ -0,0 +1,439 @@ +//! A header contains minimal architecture information, the binary kind, the number of load commands, as well as an endianness hint + +use core::fmt; +use scroll::ctx; +use scroll::{Pread, Pwrite, SizeWith}; +use scroll::ctx::SizeWith; +use plain::Plain; + +use crate::mach::constants::cputype::{CpuType, CpuSubType, CPU_SUBTYPE_MASK}; +use crate::error; +use crate::container::{self, Container}; + +// Constants for the flags field of the mach_header +/// the object file has no undefined references +pub const MH_NOUNDEFS: u32 = 0x1; +/// the object file is the output of an incremental link against a base file and can't be +/// link edited again +pub const MH_INCRLINK: u32 = 0x2; +/// the object file is input for the dynamic linker and can't be staticly link edited again +pub const MH_DYLDLINK: u32 = 0x4; +/// the object file's undefined references are bound by the dynamic linker when loaded. +pub const MH_BINDATLOAD: u32 = 0x8; +/// the file has its dynamic undefined references prebound. +pub const MH_PREBOUND: u32 = 0x10; +/// the file has its read-only and read-write segments split +pub const MH_SPLIT_SEGS: u32 = 0x20; +/// the shared library init routine is to be run lazily via catching memory faults to its writeable +/// segments (obsolete) +pub const MH_LAZY_INIT: u32 = 0x40; +/// the image is using two-level name space bindings +pub const MH_TWOLEVEL: u32 = 0x80; +/// the executable is forcing all images to use flat name space bindings +pub const MH_FORCE_FLAT: u32 = 0x100; +/// this umbrella guarantees no multiple defintions of symbols in its sub-images so the +/// two-level namespace hints can always be used. +pub const MH_NOMULTIDEFS: u32 = 0x200; +/// do not have dyld notify the prebinding agent about this executable +pub const MH_NOFIXPREBINDING: u32 = 0x400; +/// the binary is not prebound but can have its prebinding redone. only used when MH_PREBOUND is not set. +pub const MH_PREBINDABLE: u32 = 0x800; +/// indicates that this binary binds to all two-level namespace modules of its dependent libraries. +/// Only used when MH_PREBINDABLE and MH_TWOLEVEL are both set. +pub const MH_ALLMODSBOUND: u32 = 0x1000; +/// safe to divide up the sections into sub-sections via symbols for dead code stripping +pub const MH_SUBSECTIONS_VIA_SYMBOLS: u32 = 0x2000; +/// the binary has been canonicalized via the unprebind operation +pub const MH_CANONICAL: u32 = 0x4000; +/// the final linked image contains external weak symbols +pub const MH_WEAK_DEFINES: u32 = 0x8000; +/// the final linked image uses weak symbols +pub const MH_BINDS_TO_WEAK: u32 = 0x10000; +/// When this bit is set, all stacks in the task will be given stack execution privilege. +/// Only used in MH_EXECUTE filetypes. +pub const MH_ALLOW_STACK_EXECUTION: u32 = 0x20000; +/// When this bit is set, the binary declares it is safe for use in processes with uid zero +pub const MH_ROOT_SAFE: u32 = 0x40000; +/// When this bit is set, the binary declares it is safe for use in processes when issetugid() is true +pub const MH_SETUID_SAFE: u32 = 0x80000; +/// When this bit is set on a dylib, the static linker does not need to examine dependent dylibs to +/// see if any are re-exported +pub const MH_NO_REEXPORTED_DYLIBS: u32 = 0x0010_0000; +/// When this bit is set, the OS will load the main executable at a random address. +/// Only used in MH_EXECUTE filetypes. +pub const MH_PIE: u32 = 0x0020_0000; +/// Only for use on dylibs. When linking against a dylib that has this bit set, the static linker +/// will automatically not create a LC_LOAD_DYLIB load command to the dylib if no symbols are being +/// referenced from the dylib. +pub const MH_DEAD_STRIPPABLE_DYLIB: u32 = 0x0040_0000; +/// Contains a section of type S_THREAD_LOCAL_VARIABLES +pub const MH_HAS_TLV_DESCRIPTORS: u32 = 0x0080_0000; +/// When this bit is set, the OS will run the main executable with a non-executable heap even on +/// platforms (e.g. i386) that don't require it. Only used in MH_EXECUTE filetypes. +pub const MH_NO_HEAP_EXECUTION: u32 = 0x0100_0000; + +// TODO: verify this number is correct, it was previously 0x02000000 which could indicate a typo/data entry error +/// The code was linked for use in an application extension. +pub const MH_APP_EXTENSION_SAFE: u32 = 0x0200_0000; + +#[inline(always)] +pub fn flag_to_str(flag: u32) -> &'static str { + match flag { + MH_NOUNDEFS => "MH_NOUNDEFS", + MH_INCRLINK => "MH_INCRLINK", + MH_DYLDLINK => "MH_DYLDLINK", + MH_BINDATLOAD => "MH_BINDATLOAD", + MH_PREBOUND => "MH_PREBOUND", + MH_SPLIT_SEGS => "MH_SPLIT_SEGS", + MH_LAZY_INIT => "MH_LAZY_INIT", + MH_TWOLEVEL => "MH_TWOLEVEL", + MH_FORCE_FLAT => "MH_FORCE_FLAT", + MH_NOMULTIDEFS => "MH_NOMULTIDEFS", + MH_NOFIXPREBINDING => "MH_NOFIXPREBINDING", + MH_PREBINDABLE => "MH_PREBINDABLE ", + MH_ALLMODSBOUND => "MH_ALLMODSBOUND", + MH_SUBSECTIONS_VIA_SYMBOLS => "MH_SUBSECTIONS_VIA_SYMBOLS", + MH_CANONICAL => "MH_CANONICAL", + MH_WEAK_DEFINES => "MH_WEAK_DEFINES", + MH_BINDS_TO_WEAK => "MH_BINDS_TO_WEAK", + MH_ALLOW_STACK_EXECUTION => "MH_ALLOW_STACK_EXECUTION", + MH_ROOT_SAFE => "MH_ROOT_SAFE", + MH_SETUID_SAFE => "MH_SETUID_SAFE", + MH_NO_REEXPORTED_DYLIBS => "MH_NO_REEXPORTED_DYLIBS", + MH_PIE => "MH_PIE", + MH_DEAD_STRIPPABLE_DYLIB => "MH_DEAD_STRIPPABLE_DYLIB", + MH_HAS_TLV_DESCRIPTORS => "MH_HAS_TLV_DESCRIPTORS", + MH_NO_HEAP_EXECUTION => "MH_NO_HEAP_EXECUTION", + MH_APP_EXTENSION_SAFE => "MH_APP_EXTENSION_SAFE", + _ => "UNKNOWN FLAG", + } +} + +/// Mach Header magic constant +pub const MH_MAGIC: u32 = 0xfeed_face; +pub const MH_CIGAM: u32 = 0xcefa_edfe; +/// Mach Header magic constant for 64-bit +pub const MH_MAGIC_64: u32 = 0xfeed_facf; +pub const MH_CIGAM_64: u32 = 0xcffa_edfe; + +// Constants for the filetype field of the mach_header +/// relocatable object file +pub const MH_OBJECT: u32 = 0x1; +/// demand paged executable file +pub const MH_EXECUTE: u32 = 0x2; +/// fixed VM shared library file +pub const MH_FVMLIB: u32 = 0x3; +/// core file +pub const MH_CORE: u32 = 0x4; +/// preloaded executable file +pub const MH_PRELOAD: u32 = 0x5; +/// dynamically bound shared library +pub const MH_DYLIB: u32 = 0x6; +/// dynamic link editor +pub const MH_DYLINKER: u32 = 0x7; +/// dynamically bound bundle file +pub const MH_BUNDLE: u32 = 0x8; +/// shared library stub for static linking only, no section contents +pub const MH_DYLIB_STUB: u32 = 0x9; +/// companion file with only debug sections +pub const MH_DSYM: u32 = 0xa; +/// x86_64 kexts +pub const MH_KEXT_BUNDLE: u32 = 0xb; + +pub fn filetype_to_str(filetype: u32) -> &'static str { + match filetype { + MH_OBJECT => "OBJECT", + MH_EXECUTE => "EXECUTE", + MH_FVMLIB => "FVMLIB", + MH_CORE => "CORE", + MH_PRELOAD => "PRELOAD", + MH_DYLIB => "DYLIB", + MH_DYLINKER => "DYLINKER", + MH_BUNDLE => "BUNDLE", + MH_DYLIB_STUB => "DYLIB_STUB", + MH_DSYM => "DSYM", + MH_KEXT_BUNDLE => "KEXT_BUNDLE", + _ => "UNKNOWN FILETYPE", + } +} + +#[repr(C)] +#[derive(Clone, Copy, Default, Debug)] +#[derive(Pread, Pwrite, SizeWith)] +/// A 32-bit Mach-o header +pub struct Header32 { + /// mach magic number identifier + pub magic: u32, + /// cpu specifier + pub cputype: u32, + /// machine specifier + pub cpusubtype: u32, + /// type of file + pub filetype: u32, + /// number of load commands + pub ncmds: u32, + /// the size of all the load commands + pub sizeofcmds: u32, + /// flags + pub flags: u32, +} + +pub const SIZEOF_HEADER_32: usize = 0x1c; + +unsafe impl Plain for Header32 {} + +impl Header32 { + /// Transmutes the given byte array into the corresponding 32-bit Mach-o header + pub fn from_bytes(bytes: &[u8; SIZEOF_HEADER_32]) -> &Self { + plain::from_bytes(bytes).unwrap() + } + pub fn size(&self) -> usize { + SIZEOF_HEADER_32 + } +} + +#[repr(C)] +#[derive(Clone, Copy, Default, Debug)] +#[derive(Pread, Pwrite, SizeWith)] +/// A 64-bit Mach-o header +pub struct Header64 { + /// mach magic number identifier + pub magic: u32, + /// cpu specifier + pub cputype: u32, + /// machine specifier + pub cpusubtype: u32, + /// type of file + pub filetype: u32, + /// number of load commands + pub ncmds: u32, + /// the size of all the load commands + pub sizeofcmds: u32, + /// flags + pub flags: u32, + pub reserved: u32, +} + +unsafe impl Plain for Header64 {} + +pub const SIZEOF_HEADER_64: usize = 32; + +impl Header64 { + /// Transmutes the given byte array into the corresponding 64-bit Mach-o header + pub fn from_bytes(bytes: &[u8; SIZEOF_HEADER_64]) -> &Self { + plain::from_bytes(bytes).unwrap() + } + pub fn size(&self) -> usize { + SIZEOF_HEADER_64 + } +} + +#[repr(C)] +#[derive(Clone, Copy, Default)] +/// Generic sized header +pub struct Header { + pub magic: u32, + pub cputype: u32, + pub cpusubtype: u32, + /// type of file + pub filetype: u32, + /// number of load commands + pub ncmds: usize, + /// the size of all the load commands + pub sizeofcmds: u32, + /// flags + pub flags: u32, + pub reserved: u32, +} + +impl fmt::Debug for Header { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Header") + .field("magic", &format_args!("0x{:x}", self.magic)) + .field("cputype", &self.cputype()) + .field("cpusubtype", &format_args!("0x{:x}", self.cpusubtype())) + .field("filetype", &filetype_to_str(self.filetype)) + .field("ncmds", &self.ncmds) + .field("sizeofcmds", &self.sizeofcmds) + .field("flags", &format_args!("0x{:x}", self.flags)) + .field("reserved", &format_args!("0x{:x}", self.reserved)) + .finish() + } +} + +impl From<Header32> for Header { + fn from (header: Header32) -> Self { + Header { + magic: header.magic, + cputype: header.cputype, + cpusubtype: header.cpusubtype, + filetype: header.filetype, + ncmds: header.ncmds as usize, + sizeofcmds: header.sizeofcmds, + flags: header.flags, + reserved: 0, + } + } +} + +impl From<Header> for Header32 { + fn from (header: Header) -> Self { + Header32 { + magic: header.magic, + cputype: header.cputype, + cpusubtype: header.cpusubtype, + filetype: header.filetype, + ncmds: header.ncmds as u32, + sizeofcmds: header.sizeofcmds, + flags: header.flags, + } + } +} + +impl From<Header64> for Header { + fn from (header: Header64) -> Self { + Header { + magic: header.magic, + cputype: header.cputype, + cpusubtype: header.cpusubtype, + filetype: header.filetype, + ncmds: header.ncmds as usize, + sizeofcmds: header.sizeofcmds, + flags: header.flags, + reserved: header.reserved, + } + } +} + +impl From<Header> for Header64 { + fn from (header: Header) -> Self { + Header64 { + magic: header.magic, + cputype: header.cputype, + cpusubtype: header.cpusubtype, + filetype: header.filetype, + ncmds: header.ncmds as u32, + sizeofcmds: header.sizeofcmds, + flags: header.flags, + reserved: header.reserved, + } + } +} + +impl Header { + pub fn new(ctx: container::Ctx) -> Self { + let mut header = Header::default(); + header.magic = if ctx.is_big () { MH_MAGIC_64 } else { MH_MAGIC }; + header + } + /// Returns the cpu type + pub fn cputype(&self) -> CpuType { + self.cputype + } + /// Returns the cpu subtype with the capabilities removed + pub fn cpusubtype(&self) -> CpuSubType { + self.cpusubtype & !CPU_SUBTYPE_MASK + } + /// Returns the capabilities of the CPU + pub fn cpu_caps(&self) -> u32 { + (self.cpusubtype & CPU_SUBTYPE_MASK) >> 24 + } +} + +impl ctx::SizeWith<container::Ctx> for Header { + fn size_with(container: &container::Ctx) -> usize { + match container.container { + Container::Little => { + SIZEOF_HEADER_32 + }, + Container::Big => { + SIZEOF_HEADER_64 + }, + } + } +} + +impl ctx::SizeWith<Container> for Header { + fn size_with(container: &Container) -> usize { + match container { + Container::Little => { + SIZEOF_HEADER_32 + }, + Container::Big => { + SIZEOF_HEADER_64 + }, + } + } +} + +impl<'a> ctx::TryFromCtx<'a, container::Ctx> for Header { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], container::Ctx { le, container }: container::Ctx) -> error::Result<(Self, usize)> { + let size = bytes.len(); + if size < SIZEOF_HEADER_32 || size < SIZEOF_HEADER_64 { + let error = error::Error::Malformed("bytes size is smaller than a Mach-o header".into()); + Err(error) + } else { + match container { + Container::Little => { + let header = bytes.pread_with::<Header32>(0, le)?; + Ok((Header::from(header), SIZEOF_HEADER_32)) + }, + Container::Big => { + let header = bytes.pread_with::<Header64>(0, le)?; + Ok((Header::from(header), SIZEOF_HEADER_64)) + }, + } + } + } +} + +impl ctx::TryIntoCtx<container::Ctx> for Header { + type Error = crate::error::Error; + fn try_into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) -> error::Result<usize> { + match ctx.container { + Container::Little => { + bytes.pwrite_with(Header32::from(self), 0, ctx.le)?; + }, + Container::Big => { + bytes.pwrite_with(Header64::from(self), 0, ctx.le)?; + } + }; + Ok(Header::size_with(&ctx)) + } +} + +impl ctx::IntoCtx<container::Ctx> for Header { + fn into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) { + bytes.pwrite_with(self, 0, ctx).unwrap(); + } +} + +#[cfg(test)] +mod tests { + use std::mem::size_of; + use super::*; + + #[test] + fn test_parse_armv7_header() { + use crate::mach::constants::cputype::CPU_TYPE_ARM; + const CPU_SUBTYPE_ARM_V7: u32 = 9; + use super::Header; + use crate::container::{Ctx, Container, Endian}; + use scroll::{Pread}; + let bytes = b"\xce\xfa\xed\xfe\x0c\x00\x00\x00\t\x00\x00\x00\n\x00\x00\x00\x06\x00\x00\x00\x8c\r\x00\x00\x00\x00\x00\x00\x1b\x00\x00\x00\x18\x00\x00\x00\xe0\xf7B\xbb\x1c\xf50w\xa6\xf7u\xa3\xba("; + let header: Header = bytes.pread_with(0, Ctx::new(Container::Little, Endian::Little)).unwrap(); + assert_eq!(header.cputype, CPU_TYPE_ARM); + assert_eq!(header.cpusubtype, CPU_SUBTYPE_ARM_V7); + } + + #[test] + fn sizeof_header32() { + assert_eq!(SIZEOF_HEADER_32, size_of::<Header32>()); + } + + #[test] + fn sizeof_header64() { + assert_eq!(SIZEOF_HEADER_64, size_of::<Header64>()); + } +} diff --git a/third_party/rust/goblin/src/mach/imports.rs b/third_party/rust/goblin/src/mach/imports.rs new file mode 100644 index 0000000000..65bbd75842 --- /dev/null +++ b/third_party/rust/goblin/src/mach/imports.rs @@ -0,0 +1,277 @@ +//! Dynamically linked symbolic imports + +// table of tuples: +// <seg-index, seg-offset, type, symbol-library-ordinal, symbol-name, addend> +// symbol flags are undocumented + +use core::ops::Range; +use core::fmt::{self, Debug}; +use scroll::{Sleb128, Uleb128, Pread}; +use crate::alloc::vec::Vec; + +use crate::container; +use crate::error; +use crate::mach::load_command; +use crate::mach::bind_opcodes; +use crate::mach::segment; + +#[derive(Debug)] +/// Import binding information generated by running the Finite State Automaton programmed via `bind_opcodes` +struct BindInformation<'a> { + seg_index: u8, + seg_offset: u64, + bind_type: u8, + symbol_library_ordinal: u8, + symbol_name: &'a str, + symbol_flags: u8, + addend: i64, + special_dylib: u8, // seeing self = 0 assuming this means the symbol is imported from itself, because its... libSystem.B.dylib? + is_lazy: bool, +} + +impl<'a> BindInformation<'a> { + pub fn new (is_lazy: bool) -> Self { + let mut bind_info = BindInformation::default(); + if is_lazy { + bind_info.is_lazy = true; + bind_info.bind_type = bind_opcodes::BIND_TYPE_POINTER; + } + bind_info + } + pub fn is_weak(&self) -> bool { + self.symbol_flags & bind_opcodes::BIND_SYMBOL_FLAGS_WEAK_IMPORT != 0 + } +} + +impl<'a> Default for BindInformation<'a> { + fn default() -> Self { + BindInformation { + seg_index: 0, + seg_offset: 0x0, + bind_type: 0x0, + special_dylib: 1, + symbol_library_ordinal: 0, + symbol_name: "", + symbol_flags: 0, + addend: 0, + is_lazy: false + } + } +} + +#[derive(Debug)] +/// An dynamically linked symbolic import +pub struct Import<'a> { + /// The symbol name dyld uses to resolve this import + pub name: &'a str, + /// The library this symbol belongs to (thanks to two-level namespaces) + pub dylib: &'a str, + /// Whether the symbol is lazily resolved or not + pub is_lazy: bool, + /// The offset in the binary this import is found + pub offset: u64, + /// The size of this import + pub size: usize, + /// The virtual memory address at which this import is found + pub address: u64, + /// The addend of this import + pub addend: i64, + /// Whether this import is weak + pub is_weak: bool, + /// The offset in the stream of bind opcodes that caused this import + pub start_of_sequence_offset: u64 +} + +impl<'a> Import<'a> { + /// Create a new import from the import binding information in `bi` + fn new(bi: &BindInformation<'a>, libs: &[&'a str], segments: &[segment::Segment], start_of_sequence_offset: usize) -> Import<'a> { + let (offset, address) = { + let segment = &segments[bi.seg_index as usize]; + ( + segment.fileoff + bi.seg_offset, + segment.vmaddr + bi.seg_offset + ) + }; + let size = if bi.is_lazy { 8 } else { 0 }; + Import { + name: bi.symbol_name, + dylib: libs[bi.symbol_library_ordinal as usize], + is_lazy: bi.is_lazy, + offset, + size, + address, + addend: bi.addend, + is_weak: bi.is_weak(), + start_of_sequence_offset: start_of_sequence_offset as u64 + } + } +} + +/// An interpreter for mach BIND opcodes. +/// Runs on prebound (non lazy) symbols (usually dylib extern consts and extern variables), +/// and lazy symbols (usually dylib functions) +pub struct BindInterpreter<'a> { + data: &'a [u8], + location: Range<usize>, + lazy_location: Range<usize>, +} + +impl<'a> Debug for BindInterpreter<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("BindInterpreter") + .field("data", &"<... redacted ...>") + .field("location", &format_args!("{:#x}..{:#x}", self.location.start, self.location.end)) + .field("lazy_location", &format_args!("{:#x}..{:#x}", self.lazy_location.start, self.lazy_location.end)) + .finish() + } +} + + +impl<'a> BindInterpreter<'a> { + /// Construct a new import binding interpreter from `bytes` and the load `command` + pub fn new(bytes: &'a [u8], command: &load_command::DyldInfoCommand) -> Self { + let get_pos = |off: u32, size: u32| -> Range<usize> { + off as usize..(off + size) as usize + }; + let location = get_pos(command.bind_off, command.bind_size); + let lazy_location = get_pos(command.lazy_bind_off, command.lazy_bind_size); + BindInterpreter { + data: bytes, + location, + lazy_location, + } + } + /// Return the imports in this binary + pub fn imports(&self, libs: &[&'a str], segments: &[segment::Segment], ctx: container::Ctx) -> error::Result<Vec<Import<'a>>>{ + let mut imports = Vec::new(); + self.run(false, libs, segments, ctx, &mut imports)?; + self.run( true, libs, segments, ctx, &mut imports)?; + Ok(imports) + } + fn run(&self, is_lazy: bool, libs: &[&'a str], segments: &[segment::Segment], ctx: container::Ctx, imports: &mut Vec<Import<'a>>) -> error::Result<()>{ + use crate::mach::bind_opcodes::*; + let location = if is_lazy { + &self.lazy_location + } else { + &self.location + }; + let mut bind_info = BindInformation::new(is_lazy); + let mut offset = location.start; + let mut start_of_sequence: usize = 0; + while offset < location.end { + let opcode = self.data.gread::<i8>(&mut offset)? as bind_opcodes::Opcode; + // let mut input = String::new(); + // ::std::io::stdin().read_line(&mut input).unwrap(); + // println!("opcode: {} ({:#x}) offset: {:#x}\n {:?}", opcode_to_str(opcode & BIND_OPCODE_MASK), opcode, offset - location.start - 1, &bind_info); + match opcode & BIND_OPCODE_MASK { + // we do nothing, don't update our records, and add a new, fresh record + BIND_OPCODE_DONE => { + bind_info = BindInformation::new(is_lazy); + start_of_sequence = offset - location.start; + }, + BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { + let symbol_library_ordinal = opcode & BIND_IMMEDIATE_MASK; + bind_info.symbol_library_ordinal = symbol_library_ordinal; + }, + BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { + let symbol_library_ordinal = Uleb128::read(&self.data, &mut offset)?; + bind_info.symbol_library_ordinal = symbol_library_ordinal as u8; + }, + BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => { + // dyld puts the immediate into the symbol_library_ordinal field... + let special_dylib = opcode & BIND_IMMEDIATE_MASK; + // Printf.printf "special_dylib: 0x%x\n" special_dylib + bind_info.special_dylib = special_dylib; + }, + BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { + let symbol_flags = opcode & BIND_IMMEDIATE_MASK; + let symbol_name = self.data.pread::<&str>(offset)?; + offset += symbol_name.len() + 1; // second time this \0 caused debug woes + bind_info.symbol_name = symbol_name; + bind_info.symbol_flags = symbol_flags; + }, + BIND_OPCODE_SET_TYPE_IMM => { + let bind_type = opcode & BIND_IMMEDIATE_MASK; + bind_info.bind_type = bind_type; + }, + BIND_OPCODE_SET_ADDEND_SLEB => { + let addend = Sleb128::read(&self.data, &mut offset)?; + bind_info.addend = addend; + }, + BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { + let seg_index = opcode & BIND_IMMEDIATE_MASK; + // dyld sets the address to the segActualLoadAddress(segIndex) + uleb128 + // address = segActualLoadAddress(segmentIndex) + read_uleb128(p, end); + let seg_offset = Uleb128::read(&self.data, &mut offset)?; + bind_info.seg_index = seg_index; + bind_info.seg_offset = seg_offset; + }, + BIND_OPCODE_ADD_ADDR_ULEB => { + let addr = Uleb128::read(&self.data, &mut offset)?; + let seg_offset = bind_info.seg_offset.wrapping_add(addr); + bind_info.seg_offset = seg_offset; + }, + // record the record by placing its value into our list + BIND_OPCODE_DO_BIND => { + // from dyld: + // if ( address >= segmentEndAddress ) + // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); + // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); + // address += sizeof(intptr_t); + imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); + let seg_offset = bind_info.seg_offset.wrapping_add(ctx.size() as u64); + bind_info.seg_offset = seg_offset; + }, + BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => { + // dyld: + // if ( address >= segmentEndAddress ) + // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); + // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); + // address += read_uleb128(p, end) + sizeof(intptr_t); + // we bind the old record, then increment bind info address for the next guy, plus the ptr offset *) + imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); + let addr = Uleb128::read(&self.data, &mut offset)?; + let seg_offset = bind_info.seg_offset.wrapping_add(addr).wrapping_add(ctx.size() as u64); + bind_info.seg_offset = seg_offset; + }, + BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => { + // dyld: + // if ( address >= segmentEndAddress ) + // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); + // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); + // address += immediate*sizeof(intptr_t) + sizeof(intptr_t); + // break; + // similarly, we bind the old record, then perform address manipulation for the next record + imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); + let scale = opcode & BIND_IMMEDIATE_MASK; + let size = ctx.size() as u64; + let seg_offset = bind_info.seg_offset.wrapping_add(u64::from(scale) * size).wrapping_add(size); + bind_info.seg_offset = seg_offset; + }, + BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => { + // dyld: + // count = read_uleb128(p, end); + // skip = read_uleb128(p, end); + // for (uint32_t i=0; i < count; ++i) { + // if ( address >= segmentEndAddress ) + // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); + // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); + // address += skip + sizeof(intptr_t); + // } + // break; + let count = Uleb128::read(&self.data, &mut offset)?; + let skip = Uleb128::read(&self.data, &mut offset)?; + let skip_plus_size = skip + ctx.size() as u64; + for _i in 0..count { + imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); + let seg_offset = bind_info.seg_offset.wrapping_add(skip_plus_size); + bind_info.seg_offset = seg_offset; + } + }, + _ => { + } + } + } + Ok(()) + } +} diff --git a/third_party/rust/goblin/src/mach/load_command.rs b/third_party/rust/goblin/src/mach/load_command.rs new file mode 100644 index 0000000000..b64853fc2f --- /dev/null +++ b/third_party/rust/goblin/src/mach/load_command.rs @@ -0,0 +1,1513 @@ +//! Load commands tell the kernel and dynamic linker anything from how to load this binary into memory, what the entry point is, apple specific information, to which libraries it requires for dynamic linking + +use crate::error; +use core::fmt::{self, Display}; +use scroll::{ctx, Endian}; +use scroll::{Pread, Pwrite, IOread, IOwrite, SizeWith}; + +/////////////////////////////////////// +// Load Commands from mach-o/loader.h +// with some rusty additions +////////////////////////////////////// + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, SizeWith)] +/// Occurs at the beginning of every load command to serve as a sort of tagged union/enum discriminant +pub struct LoadCommandHeader { + pub cmd: u32, + pub cmdsize: u32, +} + +impl Display for LoadCommandHeader { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "LoadCommandHeader: {} size: {}", cmd_to_str(self.cmd), self.cmdsize) + } +} + +pub const SIZEOF_LOAD_COMMAND: usize = 8; + +pub type LcStr = u32; + +pub const SIZEOF_LC_STR: usize = 4; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Section32 { + /// name of this section + pub sectname: [u8; 16], + /// segment this section goes in + pub segname: [u8; 16], + /// memory address of this section + pub addr: u32, + /// size in bytes of this section + pub size: u32, + /// file offset of this section + pub offset: u32, + /// section alignment (power of 2) + pub align: u32, + /// file offset of relocation entries + pub reloff: u32, + /// number of relocation entries + pub nreloc: u32, + /// flags (section type and attributes) + pub flags: u32, + /// reserved (for offset or index) + pub reserved1: u32, + /// reserved (for count or sizeof) + pub reserved2: u32, +} + +pub const SIZEOF_SECTION_32: usize = 68; + +/// for 64-bit architectures +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Section64 { + /// name of this section + pub sectname: [u8; 16], + /// segment this section goes in + pub segname: [u8; 16], + /// memory address of this section + pub addr: u64, + /// size in bytes of this section + pub size: u64, + /// file offset of this section + pub offset: u32, + /// section alignment (power of 2) + pub align: u32, + /// file offset of relocation entries + pub reloff: u32, + /// number of relocation entries + pub nreloc: u32, + /// flags (section type and attributes + pub flags: u32, + /// reserved (for offset or index) + pub reserved1: u32, + /// reserved (for count or sizeof) + pub reserved2: u32, + /// reserved + pub reserved3: u32, +} + +pub const SIZEOF_SECTION_64: usize = 80; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SegmentCommand32 { + pub cmd: u32, + pub cmdsize: u32, + pub segname: [u8; 16], + pub vmaddr: u32, + pub vmsize: u32, + pub fileoff: u32, + pub filesize: u32, + pub maxprot: u32, + pub initprot: u32, + pub nsects: u32, + pub flags: u32, +} + +pub const SIZEOF_SEGMENT_COMMAND_32: usize = 56; + +impl SegmentCommand32 { + pub fn name(&self) -> error::Result<&str> { + Ok(self.segname.pread::<&str>(0)?) + } +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SegmentCommand64 { + pub cmd: u32, + pub cmdsize: u32, + pub segname: [u8; 16], + pub vmaddr: u64, + pub vmsize: u64, + pub fileoff: u64, + pub filesize: u64, + pub maxprot: u32, + pub initprot: u32, + pub nsects: u32, + pub flags: u32, +} + +pub const SIZEOF_SEGMENT_COMMAND_64: usize = 72; + +impl SegmentCommand64 { + pub fn name(&self) -> error::Result<&str> { + Ok(self.segname.pread::<&str>(0)?) + } +} +/// Fixed virtual memory shared libraries are identified by two things. The +/// target pathname (the name of the library as found for execution), and the +/// minor version number. The address of where the headers are loaded is in +/// header_addr. (THIS IS OBSOLETE and no longer supported). +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Fvmlib { + /// library's target pathname + pub name: u32, + /// library's minor version number + pub minor_version: u32, + /// library's header address + pub header_addr: u32, +} + +pub const SIZEOF_FVMLIB: usize = 12; + +/// A fixed virtual shared library (fipub constype == MH_FVMLIB in the mach header) +/// contains a fvmlib_command (cmd == LC_IDFVMLIB) to identify the library. +/// An object that uses a fixed virtual shared library also contains a +/// fvmlib_command (cmd == LC_LOADFVMLIB) for each library it uses. +/// (THIS IS OBSOLETE and no longer supported). +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct FvmlibCommand { + /// LC_IDFVMLIB or LC_LOADFVMLIB + pub cmd: u32, + /// includes pathname string + pub cmdsize: u32, + /// the library identification + pub fvmlib: Fvmlib, +} + +pub const SIZEOF_FVMLIB_COMMAND: usize = 20; + +// /// Dynamicly linked shared libraries are identified by two things. The +// /// pathname (the name of the library as found for execution), and the +// /// compatibility version number. The pathname must match and the compatibility +// /// number in the user of the library must be greater than or equal to the +// /// library being used. The time stamp is used to record the time a library was +// /// built and copied into user so it can be use to determined if the library used +// /// at runtime is exactly the same as used to built the program. +// struct dylib { +// union lc_str name; // library's path name +// uint32_t timestamp; // library's build time stamp +// uint32_t current_version; // library's current version number +// uint32_t compatibility_version; // library's compatibility vers number +// } + +/// A dynamically linked shared library (fipub constype == MH_DYLIB in the mach header) +/// contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. +/// An object that uses a dynamically linked shared library also contains a +/// dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or +/// LC_REEXPORT_DYLIB) for each library it uses. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Dylib { + /// library's path name + pub name: LcStr, + /// library's build time stamp + pub timestamp: u32, + /// library's current version number + pub current_version: u32, + /// library's compatibility vers number + pub compatibility_version: u32, +} + +pub const SIZEOF_DYLIB: usize = 16; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DylibCommand { + /// LC_ID_DYLIB, LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB + pub cmd: u32, + /// includes pathname string + pub cmdsize: u32, + /// the library identification + pub dylib: Dylib, + } + +pub const SIZEOF_DYLIB_COMMAND: usize = 20; + +/// A dynamically linked shared library may be a subframework of an umbrella +/// framework. If so it will be linked with "-umbrella umbrella_name" where +/// Where "umbrella_name" is the name of the umbrella framework. A subframework +/// can only be linked against by its umbrella framework or other subframeworks +/// that are part of the same umbrella framework. Otherwise the static link +/// editor produces an error and states to link against the umbrella framework. +/// The name of the umbrella framework for subframeworks is recorded in the +/// following structure. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SubFrameworkCommand { + /// LC_SUB_FRAMEWORK + pub cmd: u32, + /// includes umbrella string + pub cmdsize: u32, + /// the umbrella framework name + pub umbrella: u32, +} + +pub const SIZEOF_SUB_FRAMEWORK_COMMAND: usize = 12; + +/// For dynamically linked shared libraries that are subframework of an umbrella +/// framework they can allow clients other than the umbrella framework or other +/// subframeworks in the same umbrella framework. To do this the subframework +/// is built with "-allowable_client client_name" and an LC_SUB_CLIENT load +/// command is created for each -allowable_client flag. The client_name is +/// usually a framework name. It can also be a name used for bundles clients +/// where the bundle is built with "-client_name client_name". +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SubClientCommand { + /// LC_SUB_CLIENT + pub cmd: u32, + /// includes client string + pub cmdsize: u32, + /// the client name + pub client: LcStr, +} + +pub const SIZEOF_SUB_CLIENT_COMMAND: usize = 12; + +/// A dynamically linked shared library may be a sub_umbrella of an umbrella +/// framework. If so it will be linked with "-sub_umbrella umbrella_name" where +/// Where "umbrella_name" is the name of the sub_umbrella framework. When +/// staticly linking when -twolevel_namespace is in effect a twolevel namespace +/// umbrella framework will only cause its subframeworks and those frameworks +/// listed as sub_umbrella frameworks to be implicited linked in. Any other +/// dependent dynamic libraries will not be linked it when -twolevel_namespace +/// is in effect. The primary library recorded by the static linker when +/// resolving a symbol in these libraries will be the umbrella framework. +/// Zero or more sub_umbrella frameworks may be use by an umbrella framework. +/// The name of a sub_umbrella framework is recorded in the following structure. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SubUmbrellaCommand { + /// LC_SUB_UMBRELLA + pub cmd: u32, + /// includes sub_umbrella string + pub cmdsize: u32, + /// the sub_umbrella framework name + pub sub_umbrella: LcStr, +} + +pub const SIZEOF_SUB_UMBRELLA_COMMAND: usize = 12; + +/// A dynamically linked shared library may be a sub_library of another shared +/// library. If so it will be linked with "-sub_library library_name" where +/// Where "library_name" is the name of the sub_library shared library. When +/// staticly linking when -twolevel_namespace is in effect a twolevel namespace +/// shared library will only cause its subframeworks and those frameworks +/// listed as sub_umbrella frameworks and libraries listed as sub_libraries to +/// be implicited linked in. Any other dependent dynamic libraries will not be +/// linked it when -twolevel_namespace is in effect. The primary library +/// recorded by the static linker when resolving a symbol in these libraries +/// will be the umbrella framework (or dynamic library). Zero or more sub_library +/// shared libraries may be use by an umbrella framework or (or dynamic library). +/// The name of a sub_library framework is recorded in the following structure. +/// For example /usr/lib/libobjc_profile.A.dylib would be recorded as "libobjc". +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SubLibraryCommand { + /// LC_SUB_LIBRARY + pub cmd: u32, + /// includes sub_library string + pub cmdsize: u32, + /// the sub_library name + pub sub_library: LcStr, +} + +pub const SIZEOF_SUB_LIBRARY_COMMAND: usize = 12; + +/// A program (type == MH_EXECUTE) that is +/// prebound to its dynamic libraries has one of these for each library that +/// the static linker used in prebinding. It contains a bit vector for the +/// modules in the library. The bits indicate which modules are bound (1) and +/// which are not (0) from the library. The bit for module 0 is the low bit +/// of the first byte. So the bit for the Nth module is: +/// (linked_modules[N/8] >> N%8) & 1 +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct PreboundDylibCommand { + /// LC_PREBOUND_DYLIB + pub cmd: u32, + /// includes strings + pub cmdsize: u32, + /// library's path name + pub name: LcStr, + /// number of modules in library + pub nmodules: u32, + /// bit vector of linked modules + // TODO: fixme + pub linked_modules: LcStr, +} + +pub const SIZEOF_PREBOUND_DYLIB_COMMAND: usize = 20; + +/// The name of the dynamic linker +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DylinkerCommand { + pub cmd: u32, + pub cmdsize: u32, + pub name: LcStr, +} + +pub const SIZEOF_DYLINKER_COMMAND: usize = 12; + +/// Thread commands contain machine-specific data structures suitable for +/// use in the thread state primitives. The machine specific data structures +/// follow the struct thread_command as follows. +/// Each flavor of machine specific data structure is preceded by an unsigned +/// long constant for the flavor of that data structure, an uint32_t +/// that is the count of longs of the size of the state data structure and then +/// the state data structure follows. This triple may be repeated for many +/// flavors. The constants for the flavors, counts and state data structure +/// definitions are expected to be in the header file <machine/thread_status.h>. +/// These machine specific data structures sizes must be multiples of +/// 4 bytes The cmdsize reflects the total size of the thread_command +/// and all of the sizes of the constants for the flavors, counts and state +/// data structures. +/// +/// For executable objects that are unix processes there will be one +/// thread_command (cmd == LC_UNIXTHREAD) created for it by the link-editor. +/// This is the same as a LC_THREAD, except that a stack is automatically +/// created (based on the shell's limit for the stack size). CommandVariant arguments +/// and environment variables are copied onto that stack. +// unimplemented, see machine/thread_status.h for rest of values: +// uint32_t flavor flavor of thread state +// uint32_t count count of longs in thread state +// struct XXX_thread_state state thread state for this flavor +// ... +#[repr(C)] +#[derive(Copy)] +pub struct ThreadCommand { + /// LC_THREAD or LC_UNIXTHREAD + pub cmd: u32, + /// total size of this command + pub cmdsize: u32, + + /// flavor of thread state (but you also need to know the `cputype`) + pub flavor: u32, + + /// number of elements in `thread_state` that are valid + pub count: u32, + + /// The raw thread state, details of which varies by CPU + pub thread_state: [u32; 70], +} + +impl ThreadCommand { + pub fn instruction_pointer(&self, cputype: super::cputype::CpuType) -> error::Result<u64> { + // The thread command includes a `flavor` value which distinguishes between related thread + // states. However, `dyld` ignores this entirely, blindly interpreting the thread state + // values as a machine-specific set of registers matching the build configuration of the + // active `dyld` binary. + // + // Really the only thing that `dyld` cares is that the Mach header's `cputype`, so that's + // what we use here. + match cputype { + super::cputype::CPU_TYPE_X86 => { + // struct i386_thread_state_t { + // uint32_t eax; + // uint32_t ebx; + // uint32_t ecx; + // uint32_t edx; + // uint32_t edi; + // uint32_t esi; + // uint32_t ebp; + // uint32_t esp; + // uint32_t ss; + // uint32_t eflags; + // uint32_t eip; + // uint32_t cs; + // uint32_t ds; + // uint32_t es; + // uint32_t fs; + // uint32_t gs; + // } + let eip: u32 = self.thread_state[10]; + Ok(u64::from(eip)) + }, + super::cputype::CPU_TYPE_X86_64 => { + // struct x86_thread_state64_t { + // uint64_t rax; + // uint64_t rbx; + // uint64_t rcx; + // uint64_t rdx; + // uint64_t rdi; + // uint64_t rsi; + // uint64_t rbp; + // uint64_t rsp; + // uint64_t r8; + // uint64_t r9; + // uint64_t r10; + // uint64_t r11; + // uint64_t r12; + // uint64_t r13; + // uint64_t r14; + // uint64_t r15; + // uint64_t rip; + // uint64_t rflags; + // uint64_t cs; + // uint64_t fs; + // uint64_t gs; + // } + let rip: u64 = + (u64::from(self.thread_state[32])) + | ((u64::from(self.thread_state[33])) << 32); + Ok(rip) + } + super::cputype::CPU_TYPE_ARM => { + // struct arm_thread_state32_t { + // uint32_t r[13]; + // uint32_t sp; + // uint32_t lr; + // uint32_t pc; + // uint32_t cpsr; + // } + let pc: u32 = self.thread_state[15]; + Ok(u64::from(pc)) + } + super::cputype::CPU_TYPE_ARM64 | super::cputype::CPU_TYPE_ARM64_32 => { + // struct arm_thread_state64_t { + // uint64_t x[29]; + // uint64_t fp; + // uint64_t lr; + // uint64_t sp; + // uint64_t pc; + // uint32_t cpsr; + // uint32_t pad; + // } + let pc: u64 = + (u64::from(self.thread_state[64])) + | ((u64::from(self.thread_state[65])) << 32); + Ok(pc) + } + // https://github.com/m4b/goblin/issues/64 + // Probably a G4 + super::cputype::CPU_TYPE_POWERPC => { + Ok(u64::from(self.thread_state[0])) + }, + // I think the G5 was the last motorola powerpc processor used by apple before switching to intel cpus. + // unfortunately I don't have any binaries on hand to see what its thread state looks like :/ + // super::cputype::CPU_TYPE_POWERPC64 => { + // } + // Assuming above is added, I don't believe apple ever ported mach-o the mach kernel + // (and hence its binary format) to any other machines except the above, + // but I would be happy to learn otherwise + _ => { + Err(error::Error::Malformed(format!("unable to find instruction pointer for cputype {:?}", cputype))) + } + } + } +} + +impl<'a> ctx::TryFromCtx<'a, Endian> for ThreadCommand { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], le: Endian) -> error::Result<(Self, usize)> { + let lc = bytes.pread_with::<LoadCommandHeader>(0, le)?; + + // read the thread state flavor and length of the thread state + let flavor: u32 = bytes.pread_with(8, le)?; + let count: u32 = bytes.pread_with(12, le)?; + + // get a byte slice of the thread state + let thread_state_byte_length = count as usize * 4; + let thread_state_bytes = &bytes[16..16+thread_state_byte_length]; + + // check the length + if thread_state_bytes.len() < thread_state_byte_length { + return Err(error::Error::Malformed(format!("thread command specifies {} bytes for thread state but has only {}", thread_state_byte_length, thread_state_bytes.len()))); + } + if count > 70 { + return Err(error::Error::Malformed(format!("thread command specifies {} longs for thread state but we handle only 70", count))); + } + + // read the thread state + let mut thread_state: [u32; 70] = [ 0; 70 ]; + for (i, state) in thread_state.iter_mut().enumerate().take(count as usize) { + *state = thread_state_bytes.pread_with(i*4, le)?; + } + + Ok((ThreadCommand{ + cmd: lc.cmd, + cmdsize: lc.cmdsize, + flavor, + count, + thread_state, + }, lc.cmdsize as _)) + } +} + +impl Clone for ThreadCommand { + fn clone(&self) -> Self { + *self + } +} + +impl fmt::Debug for ThreadCommand { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("ThreadCommand") + .field("cmd", &self.cmd) + .field("cmdsize", &self.cmdsize) + .field("flavor", &self.flavor) + .field("count", &self.count) + .field("thread_state", &&self.thread_state[..]) + .finish() + } +} + +/// The routines command contains the address of the dynamic shared library +/// initialization routine and an index into the module table for the module +/// that defines the routine. Before any modules are used from the library the +/// dynamic linker fully binds the module that defines the initialization routine +/// and then calls it. This gets called before any module initialization +/// routines (used for C++ static constructors) in the library. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct RoutinesCommand32 { + /// LC_ROUTINES + pub cmd: u32, + /// total size of this command + pub cmdsize: u32, + /// address of initialization routine + pub init_address:u32, + /// index into the module table that the init routine is defined in + pub init_module: u32, + pub reserved1: u32, + pub reserved2: u32, + pub reserved3: u32, + pub reserved4: u32, + pub reserved5: u32, + pub reserved6: u32, +} + +/// The 64-bit routines command. Same use as above. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct RoutinesCommand64 { + /// LC_ROUTINES_64 + pub cmd: u32, + /// total size of this command + pub cmdsize: u32, + /// address of initialization routine + pub init_address: u64, + /// index into the module table that the init routine is defined in 8 bytes each + pub init_module: u64, + pub reserved1: u64, + pub reserved2: u64, + pub reserved3: u64, + pub reserved4: u64, + pub reserved5: u64, + pub reserved6: u64, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SymtabCommand { + pub cmd: u32, + pub cmdsize: u32, + pub symoff: u32, + pub nsyms: u32, + pub stroff: u32, + pub strsize: u32, +} + +impl Default for SymtabCommand { + fn default() -> Self { + SymtabCommand { + cmd: LC_SYMTAB, + cmdsize: SIZEOF_SYMTAB_COMMAND as u32, + symoff: 0, + nsyms: 0, + stroff: 0, + strsize: 0, + } + } +} + +impl SymtabCommand { + pub fn new() -> Self { + Default::default() + } +} + +pub const SIZEOF_SYMTAB_COMMAND: usize = 24; + +/// This is the second set of the symbolic information which is used to support +/// the data structures for the dynamically link editor. +/// +/// The original set of symbolic information in the symtab_command which contains +/// the symbol and string tables must also be present when this load command is +/// present. When this load command is present the symbol table is organized +/// into three groups of symbols: +/// local symbols (static and debugging symbols) - grouped by module +/// defined external symbols - grouped by module (sorted by name if not lib) +/// undefined external symbols (sorted by name if MH_BINDATLOAD is not set, +/// and in order the were seen by the static +/// linker if MH_BINDATLOAD is set) +/// In this load command there are offsets and counts to each of the three groups +/// of symbols. +/// +/// This load command contains a the offsets and sizes of the following new +/// symbolic information tables: +/// table of contents +/// module table +/// reference symbol table +/// indirect symbol table +/// The first three tables above (the table of contents, module table and +/// reference symbol table) are only present if the file is a dynamically linked +/// shared library. For executable and object modules, which are files +/// containing only one module, the information that would be in these three +/// tables is determined as follows: +/// table of contents - the defined external symbols are sorted by name +/// module table - the file contains only one module so everything in the +/// file is part of the module. +/// reference symbol table - is the defined and undefined external symbols +/// +/// For dynamically linked shared library files this load command also contains +/// offsets and sizes to the pool of relocation entries for all sections +/// separated into two groups: +/// external relocation entries +/// local relocation entries +/// For executable and object modules the relocation entries continue to hang +/// off the section structures. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DysymtabCommand { + pub cmd: u32, + pub cmdsize: u32, + /// index to local symbols + pub ilocalsym: u32, + /// number of local symbols + pub nlocalsym: u32, + /// index to externally defined symbols + pub iextdefsym: u32, + /// number of externally defined symbols + pub nextdefsym: u32, + /// index to undefined symbols + pub iundefsym: u32, + /// number of undefined symbols + pub nundefsym: u32, + /// file offset to table of contents + pub tocoff: u32, + /// number of entries in table of contents + pub ntoc: u32, + /// file offset to module table + pub modtaboff: u32, + /// number of module table entries + pub nmodtab: u32, + /// offset to referenced symbol table + pub extrefsymoff: u32, + /// number of referenced symbol table entries + pub nextrefsyms: u32, + /// file offset to the indirect symbol table + pub indirectsymoff: u32, + /// number of indirect symbol table entries + pub nindirectsyms: u32, + /// offset to external relocation entries + pub extreloff: u32, + /// number of external relocation entries + pub nextrel: u32, + /// offset to local relocation entries + pub locreloff: u32, + /// number of local relocation entries + pub nlocrel: u32, +} + +impl Default for DysymtabCommand { + fn default() -> Self { + DysymtabCommand { + cmd: LC_DYSYMTAB, + cmdsize: SIZEOF_DYSYMTAB_COMMAND as u32, + ilocalsym: 0, + nlocalsym: 0, + iextdefsym: 0, + nextdefsym: 0, + iundefsym: 0, + nundefsym: 0, + tocoff: 0, + ntoc: 0, + modtaboff: 0, + nmodtab: 0, + extrefsymoff: 0, + nextrefsyms: 0, + indirectsymoff: 0, + nindirectsyms: 0, + extreloff: 0, + nextrel: 0, + locreloff: 0, + nlocrel: 0, + } + } +} + +impl DysymtabCommand { + pub fn new() -> Self { + Default::default() + } +} + +pub const SIZEOF_DYSYMTAB_COMMAND: usize = 80; + +// TODO: unimplemented +/// a table of contents entry +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DylibTableOfContents { + /// the defined external symbol (index into the symbol table) + pub symbol_index: u32, + /// index into the module table this symbol is defined in + pub module_index: u32, +} + +// TODO: unimplemented +/// a module table entry +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DylibModule { + /// the module name (index into string table) + pub module_name: u32, + ///index into externally defined symbols + pub iextdefsym: u32, + ///number of externally defined symbols + pub nextdefsym: u32, + /// index into reference symbol table + pub irefsym: u32, + ///number of reference symbol table entries + pub nrefsym: u32, + /// index into symbols for local symbols + pub ilocalsym: u32, + ///number of local symbols + pub nlocalsym: u32, + + /// index into external relocation entries + pub iextrel: u32, + /// number of external relocation entries + pub nextrel: u32, + + /// low 16 bits are the index into the init section, high 16 bits are the index into the term section + pub iinit_iterm: u32, + /// low 16 bits are the number of init section entries, high 16 bits are the number of term section entries + pub ninit_nterm: u32, + /// the (__OBJC,_module_info) section + pub objc_module_info_addr: u32, + /// the (__OBJC,__module_info) section + pub objc_module_info_size: u32, +} + +// TODO: unimplemented +/// a 64-bit module table entry +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DylibModule64 { + /// the module name (index into string table) + pub module_name: u32, + + /// index into externally defined symbols + pub iextdefsym: u32, + /// number of externally defined symbols + pub nextdefsym: u32, + /// index into reference symbol table + pub irefsym: u32, + /// number of reference symbol table entries + pub nrefsym: u32, + /// index into symbols for local symbols + pub ilocalsym: u32, + /// number of local symbols + pub nlocalsym: u32, + + /// index into external relocation entries + pub iextrel: u32, + /// number of external relocation entries + pub nextrel: u32, + + /// low 16 bits are the index into the init section, high 16 bits are the index into the term section + pub iinit_iterm: u32, + /// low 16 bits are the number of init section entries, high 16 bits are the number of term section entries + pub ninit_nterm: u32, + + /// the (__OBJC,__module_info) section + pub objc_module_info_size: u32, + /// the (__OBJC,__module_info) section + pub objc_module_info_addr: u64, +} + +/// The entries in the reference symbol table are used when loading the module +/// (both by the static and dynamic link editors) and if the module is unloaded +/// or replaced. Therefore all external symbols (defined and undefined) are +/// listed in the module's reference table. The flags describe the type of +/// reference that is being made. The constants for the flags are defined in +/// <mach-o/nlist.h> as they are also used for symbol table entries. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DylibReference { + /// 24 bits bit-field index into the symbol table + pub isym: [u8; 24], + /// flags to indicate the type of reference + pub flags: u64, +} + +/// The twolevel_hints_command contains the offset and number of hints in the +/// two-level namespace lookup hints table. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct TwolevelHintsCommand { + /// LC_TWOLEVEL_HINTS + pub cmd: u32, + /// sizeof(struct twolevel_hints_command) + pub cmdsize: u32, + /// offset to the hint table + pub offset: u32, + /// number of hints in the hint table + pub nhints: u32, +} + +/// The entries in the two-level namespace lookup hints table are twolevel_hint +/// structs. These provide hints to the dynamic link editor where to start +/// looking for an undefined symbol in a two-level namespace image. The +/// isub_image field is an index into the sub-images (sub-frameworks and +/// sub-umbrellas list) that made up the two-level image that the undefined +/// symbol was found in when it was built by the static link editor. If +/// isub-image is 0 the the symbol is expected to be defined in library and not +/// in the sub-images. If isub-image is non-zero it is an index into the array +/// of sub-images for the umbrella with the first index in the sub-images being +/// 1. The array of sub-images is the ordered list of sub-images of the umbrella +/// that would be searched for a symbol that has the umbrella recorded as its +/// primary library. The table of contents index is an index into the +/// library's table of contents. This is used as the starting point of the +/// binary search or a directed linear search. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct TwolevelHint { + /// index into the sub images + pub isub_image: u64, + /// 24 bit field index into the table of contents + pub itoc: [u8; 24], +} + +/// The prebind_cksum_command contains the value of the original check sum for +/// prebound files or zero. When a prebound file is first created or modified +/// for other than updating its prebinding information the value of the check sum +/// is set to zero. When the file has it prebinding re-done and if the value of +/// the check sum is zero the original check sum is calculated and stored in +/// cksum field of this load command in the output file. If when the prebinding +/// is re-done and the cksum field is non-zero it is left unchanged from the +/// input file. +// TODO: unimplemented +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct PrebindCksumCommand { + /// LC_PREBIND_CKSUM + pub cmd: u32, + /// sizeof(struct prebind_cksum_command) + pub cmdsize: u32, + /// the check sum or zero + pub cksum: u32, +} + +/// The uuid load command contains a single 128-bit unique random number that +/// identifies an object produced by the static link editor. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct UuidCommand { + /// LC_UUID + pub cmd: u32, + /// sizeof(struct uuid_command) + pub cmdsize: u32, + /// 16 bytes the 128-bit uuid + pub uuid: [u8; 16], +} + +pub const SIZEOF_UUID_COMMAND: usize = 24; + +/// The rpath_command contains a path which at runtime should be added to +/// the current run path used to find @rpath prefixed dylibs. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct RpathCommand { + /// LC_RPATH + pub cmd: u32, + /// includes string + pub cmdsize: u32, + /// path to add to run path + pub path: LcStr, +} + +pub const SIZEOF_RPATH_COMMAND: usize = 12; + +/// The linkedit_data_command contains the offsets and sizes of a blob +/// of data in the __LINKEDIT segment. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct LinkeditDataCommand { + /// LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, LC_FUNCTION_STARTS, LC_DATA_IN_CODE, LC_DYLIB_CODE_SIGN_DRS or LC_LINKER_OPTIMIZATION_HINT. + pub cmd: u32, + /// sizeof(struct linkedit_data_command) + pub cmdsize: u32, + /// file offset of data in __LINKEDIT segment + pub dataoff: u32, + /// file size of data in __LINKEDIT segment + pub datasize: u32, +} + +pub const SIZEOF_LINKEDIT_DATA_COMMAND: usize = 16; + +/// The encryption_info_command contains the file offset and size of an +/// of an encrypted segment. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct EncryptionInfoCommand32 { + /// LC_ENCRYPTION_INFO + pub cmd: u32, + /// sizeof(struct encryption_info_command) + pub cmdsize: u32, + /// file offset of encrypted range + pub cryptoff: u32, + /// file size of encrypted range + pub cryptsize: u32, + /// which enryption system, 0 means not-encrypted yet + pub cryptid: u32, +} + +pub const SIZEOF_ENCRYPTION_INFO_COMMAND_32: usize = 20; + +/// The encryption_info_command_64 contains the file offset and size of an +/// of an encrypted segment (for use in x86_64 targets). +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct EncryptionInfoCommand64 { + /// LC_ENCRYPTION_INFO_64 + pub cmd: u32, + /// sizeof(struct encryption_info_command_64) + pub cmdsize: u32, + /// file offset of encrypted range + pub cryptoff: u32, + /// file size of encrypted range + pub cryptsize: u32, + /// which enryption system, 0 means not-encrypted yet + pub cryptid: u32, + /// padding to make this struct's size a multiple of 8 bytes + pub pad: u32, +} + +pub const SIZEOF_ENCRYPTION_INFO_COMMAND_64: usize = 24; + +/// The version_min_command contains the min OS version on which this +/// binary was built to run. +/// +/// LC_VERSION_MIN_MACOSX or LC_VERSION_MIN_IPHONEOS +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct VersionMinCommand { + pub cmd: u32, + pub cmdsize: u32, + /// X.Y.Z is encoded in nibbles xxxx.yy.zz + pub version: u32, + /// X.Y.Z is encoded in nibbles xxxx.yy.zz + pub sdk: u32, +} + +impl VersionMinCommand { + pub fn new(is_ios: bool) -> Self { + VersionMinCommand { + cmd: if is_ios { LC_VERSION_MIN_IPHONEOS } else { LC_VERSION_MIN_MACOSX }, + cmdsize: SIZEOF_VERSION_MIN_COMMAND as u32, + version: 0, + sdk: 0, + } + } +} + +pub const SIZEOF_VERSION_MIN_COMMAND: usize = 16; + +#[repr(C)] +#[derive(Default, Debug, Clone, Copy, Pread, Pwrite, SizeWith)] +pub struct DyldInfoCommand { + /// LC_DYLD_INFO or LC_DYLD_INFO_ONLY + pub cmd: u32, + /// sizeof(struct dyld_info_command) + pub cmdsize: u32, + /// file offset to rebase info + pub rebase_off: u32, + /// size of rebase info + pub rebase_size: u32, + /// file offset to binding info + pub bind_off: u32, + /// size of binding info + pub bind_size: u32, + /// file offset to weak binding info + pub weak_bind_off: u32, + /// size of weak binding info + pub weak_bind_size: u32, + /// file offset to lazy binding info + pub lazy_bind_off: u32, + /// size of lazy binding infs + pub lazy_bind_size: u32, + /// file offset to lazy binding info + pub export_off: u32, + /// size of lazy binding infs + pub export_size: u32, +} + +pub const SIZEOF_DYLIB_INFO_COMMAND: usize = 48; + +/// The linker_option_command contains linker options embedded in object files. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct LinkerOptionCommand { + /// LC_LINKER_OPTION only used in MH_OBJECT fipub constypes + pub cmd: u32, + pub cmdsize: u32, + /// number of strings concatenation of zero terminated UTF8 strings. Zero filled at end to align + pub count: u32, +} + +pub const SIZEOF_LINKER_OPTION_COMMAND: usize = 12; + +/// The symseg_command contains the offset and size of the GNU style +/// symbol table information as described in the header file <symseg.h>. +/// The symbol roots of the symbol segments must also be aligned properly +/// in the file. So the requirement of keeping the offsets aligned to a +/// multiple of a 4 bytes translates to the length field of the symbol +/// roots also being a multiple of a long. Also the padding must again be +/// zeroed. (THIS IS OBSOLETE and no longer supported). +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SymsegCommand { + /// LC_SYMSEG + pub cmd: u32, + /// sizeof(struct symseg_command) + pub cmdsize: u32, + /// symbol segment offset + pub offset: u32, + /// symbol segment size in bytes + pub size: u32, +} + +pub const SIZEOF_SYMSEG_COMMAND: usize = 16; + +/// The ident_command contains a free format string table following the +/// ident_command structure. The strings are null terminated and the size of +/// the command is padded out with zero bytes to a multiple of 4 bytes/ +/// (THIS IS OBSOLETE and no longer supported). +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct IdentCommand { + /// LC_IDENT + pub cmd: u32, + /// strings that follow this command + pub cmdsize: u32, +} + +pub const SIZEOF_IDENT_COMMAND: usize = 8; + +/// The fvmfile_command contains a reference to a file to be loaded at the +/// specified virtual address. (Presently, this command is reserved for +/// internal use. The kernel ignores this command when loading a program into +/// memory). +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct FvmfileCommand { + /// LC_FVMFILE + pub cmd: u32, + /// includes pathname string + pub cmdsize: u32, + /// files pathname + pub name: LcStr, + /// files virtual address + pub header_addr: u32, +} + +pub const SIZEOF_FVMFILE_COMMAND: usize = 16; + +/// The entry_point_command is a replacement for thread_command. +/// It is used for main executables to specify the location (file offset) +/// of main(). If -stack_size was used at link time, the stacksize +/// field will contain the stack size need for the main thread. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct EntryPointCommand { + pub cmd: u32, + pub cmdsize: u32, + /// uint64_t file __TEXT offset of main + pub entryoff: u64, + /// uint64_t if not zero, initial stack size + pub stacksize: u64, +} + +pub const SIZEOF_ENTRY_POINT_COMMAND: usize = 24; + +/// The source_version_command is an optional load command containing +/// the version of the sources used to build the binary. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SourceVersionCommand { + /// LC_SOURCE_VERSION + pub cmd: u32, + pub cmdsize: u32, + /// A.B.C.D.E packed as a24.b10.c10.d10.e10 + pub version: u64, +} + +/// The LC_DATA_IN_CODE load commands uses a linkedit_data_command +/// to point to an array of data_in_code_entry entries. Each entry +/// describes a range of data in a code section. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DataInCodeEntry { + /// from mach_header to start of data range + pub offset: u32, + /// number of bytes in data range + pub length: u16, + /// a DICE_KIND_* value + pub kind: u16, +} + +/////////////////////////////////////// +// Constants, et. al +/////////////////////////////////////// + +pub const LC_REQ_DYLD: u32 = 0x8000_0000; +pub const LC_LOAD_WEAK_DYLIB: u32 = 0x18 | LC_REQ_DYLD; +pub const LC_RPATH: u32 = 0x1c | LC_REQ_DYLD; +pub const LC_REEXPORT_DYLIB: u32 = 0x1f | LC_REQ_DYLD; +pub const LC_DYLD_INFO_ONLY: u32 = 0x22 | LC_REQ_DYLD; +pub const LC_LOAD_UPWARD_DYLIB: u32 = 0x23 | LC_REQ_DYLD; +pub const LC_MAIN: u32 = 0x28 | LC_REQ_DYLD; +pub const LC_SEGMENT: u32 = 0x1; +pub const LC_SYMTAB: u32 = 0x2; +pub const LC_SYMSEG: u32 = 0x3; +pub const LC_THREAD: u32 = 0x4; +pub const LC_UNIXTHREAD: u32 = 0x5; +pub const LC_LOADFVMLIB: u32 = 0x6; +pub const LC_IDFVMLIB: u32 = 0x7; +pub const LC_IDENT: u32 = 0x8; +pub const LC_FVMFILE: u32 = 0x9; +pub const LC_PREPAGE: u32 = 0xa; +pub const LC_DYSYMTAB: u32 = 0xb; +pub const LC_LOAD_DYLIB: u32 = 0xc; +pub const LC_ID_DYLIB: u32 = 0xd; +pub const LC_LOAD_DYLINKER: u32 = 0xe; +pub const LC_ID_DYLINKER: u32 = 0xf; +pub const LC_PREBOUND_DYLIB: u32 = 0x10; +pub const LC_ROUTINES: u32 = 0x11; +pub const LC_SUB_FRAMEWORK: u32 = 0x12; +pub const LC_SUB_UMBRELLA: u32 = 0x13; +pub const LC_SUB_CLIENT: u32 = 0x14; +pub const LC_SUB_LIBRARY: u32 = 0x15; +pub const LC_TWOLEVEL_HINTS: u32 = 0x16; +pub const LC_PREBIND_CKSUM: u32 = 0x17; +pub const LC_SEGMENT_64: u32 = 0x19; +pub const LC_ROUTINES_64: u32 = 0x1a; +pub const LC_UUID: u32 = 0x1b; +pub const LC_CODE_SIGNATURE: u32 = 0x1d; +pub const LC_SEGMENT_SPLIT_INFO: u32 = 0x1e; +pub const LC_LAZY_LOAD_DYLIB: u32 = 0x20; +pub const LC_ENCRYPTION_INFO: u32 = 0x21; +pub const LC_DYLD_INFO: u32 = 0x22; +pub const LC_VERSION_MIN_MACOSX: u32 = 0x24; +pub const LC_VERSION_MIN_IPHONEOS: u32 = 0x25; +pub const LC_FUNCTION_STARTS: u32 = 0x26; +pub const LC_DYLD_ENVIRONMENT: u32 = 0x27; +pub const LC_DATA_IN_CODE: u32 = 0x29; +pub const LC_SOURCE_VERSION: u32 = 0x2A; +pub const LC_DYLIB_CODE_SIGN_DRS: u32 = 0x2B; +pub const LC_ENCRYPTION_INFO_64: u32 = 0x2C; +pub const LC_LINKER_OPTION: u32 = 0x2D; +pub const LC_LINKER_OPTIMIZATION_HINT: u32 = 0x2E; + +pub fn cmd_to_str(cmd: u32) -> &'static str { + match cmd { + LC_SEGMENT => "LC_SEGMENT", + LC_SYMTAB => "LC_SYMTAB", + LC_SYMSEG => "LC_SYMSEG", + LC_THREAD => "LC_THREAD", + LC_UNIXTHREAD => "LC_UNIXTHREAD", + LC_LOADFVMLIB => "LC_LOADFVMLIB", + LC_IDFVMLIB => "LC_IDFVMLIB", + LC_IDENT => "LC_IDENT", + LC_FVMFILE => "LC_FVMFILE", + LC_PREPAGE => "LC_PREPAGE", + LC_DYSYMTAB => "LC_DYSYMTAB", + LC_LOAD_DYLIB => "LC_LOAD_DYLIB", + LC_ID_DYLIB => "LC_ID_DYLIB", + LC_LOAD_DYLINKER => "LC_LOAD_DYLINKER", + LC_ID_DYLINKER => "LC_ID_DYLINKER", + LC_PREBOUND_DYLIB => "LC_PREBOUND_DYLIB", + LC_ROUTINES => "LC_ROUTINES", + LC_SUB_FRAMEWORK => "LC_SUB_FRAMEWORK", + LC_SUB_UMBRELLA => "LC_SUB_UMBRELLA", + LC_SUB_CLIENT => "LC_SUB_CLIENT", + LC_SUB_LIBRARY => "LC_SUB_LIBRARY", + LC_TWOLEVEL_HINTS => "LC_TWOLEVEL_HINTS", + LC_PREBIND_CKSUM => "LC_PREBIND_CKSUM", + LC_LOAD_WEAK_DYLIB => "LC_LOAD_WEAK_DYLIB", + LC_SEGMENT_64 => "LC_SEGMENT_64", + LC_ROUTINES_64 => "LC_ROUTINES_64", + LC_UUID => "LC_UUID", + LC_RPATH => "LC_RPATH", + LC_CODE_SIGNATURE => "LC_CODE_SIGNATURE", + LC_SEGMENT_SPLIT_INFO => "LC_SEGMENT_SPLIT_INFO", + LC_REEXPORT_DYLIB => "LC_REEXPORT_DYLIB", + LC_LAZY_LOAD_DYLIB => "LC_LAZY_LOAD_DYLIB", + LC_ENCRYPTION_INFO => "LC_ENCRYPTION_INFO", + LC_DYLD_INFO => "LC_DYLD_INFO", + LC_DYLD_INFO_ONLY => "LC_DYLD_INFO_ONLY", + LC_LOAD_UPWARD_DYLIB => "LC_LOAD_UPWARD_DYLIB", + LC_VERSION_MIN_MACOSX => "LC_VERSION_MIN_MACOSX", + LC_VERSION_MIN_IPHONEOS => "LC_VERSION_MIN_IPHONEOS", + LC_FUNCTION_STARTS => "LC_FUNCTION_STARTS", + LC_DYLD_ENVIRONMENT => "LC_DYLD_ENVIRONMENT", + LC_MAIN => "LC_MAIN", + LC_DATA_IN_CODE => "LC_DATA_IN_CODE", + LC_SOURCE_VERSION => "LC_SOURCE_VERSION", + LC_DYLIB_CODE_SIGN_DRS => "LC_DYLIB_CODE_SIGN_DRS", + LC_ENCRYPTION_INFO_64 => "LC_ENCRYPTION_INFO_64", + LC_LINKER_OPTION => "LC_LINKER_OPTION", + LC_LINKER_OPTIMIZATION_HINT => "LC_LINKER_OPTIMIZATION_HINT", + _ => "LC_UNKNOWN", + } +} + +/////////////////////////////////////////// +// Typesafe Command Variants +/////////////////////////////////////////// + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +/// The various load commands as a cast-free variant/enum +pub enum CommandVariant { + Segment32 (SegmentCommand32), + Segment64 (SegmentCommand64), + Uuid (UuidCommand), + Symtab (SymtabCommand), + Symseg (SymsegCommand), + Thread (ThreadCommand), + Unixthread (ThreadCommand), + LoadFvmlib (FvmlibCommand), + IdFvmlib (FvmlibCommand), + Ident (IdentCommand), + Fvmfile (FvmfileCommand), + Prepage (LoadCommandHeader), + Dysymtab (DysymtabCommand), + LoadDylib (DylibCommand), + IdDylib (DylibCommand), + LoadDylinker (DylinkerCommand), + IdDylinker (DylinkerCommand), + PreboundDylib (PreboundDylibCommand), + Routines32 (RoutinesCommand32), + Routines64 (RoutinesCommand64), + SubFramework (SubFrameworkCommand), + SubUmbrella (SubUmbrellaCommand), + SubClient (SubClientCommand), + SubLibrary (SubLibraryCommand), + TwolevelHints (TwolevelHintsCommand), + PrebindCksum (PrebindCksumCommand), + LoadWeakDylib (DylibCommand), + Rpath (RpathCommand), + CodeSignature (LinkeditDataCommand), + SegmentSplitInfo (LinkeditDataCommand), + ReexportDylib (DylibCommand), + LazyLoadDylib (DylibCommand), + EncryptionInfo32 (EncryptionInfoCommand32), + EncryptionInfo64 (EncryptionInfoCommand64), + DyldInfo (DyldInfoCommand), + DyldInfoOnly (DyldInfoCommand), + LoadUpwardDylib (DylibCommand), + VersionMinMacosx (VersionMinCommand), + VersionMinIphoneos (VersionMinCommand), + FunctionStarts (LinkeditDataCommand), + DyldEnvironment (DylinkerCommand), + Main (EntryPointCommand), + DataInCode (LinkeditDataCommand), + SourceVersion (SourceVersionCommand), + DylibCodeSignDrs (LinkeditDataCommand), + LinkerOption (LinkeditDataCommand), + LinkerOptimizationHint (LinkeditDataCommand), + Unimplemented (LoadCommandHeader), +} + +impl<'a> ctx::TryFromCtx<'a, Endian> for CommandVariant { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], le: Endian) -> error::Result<(Self, usize)> { + use self::CommandVariant::*; + let lc = bytes.pread_with::<LoadCommandHeader>(0, le)?; + let size = lc.cmdsize as usize; + //println!("offset {:#x} cmd: {:#x} size: {:?} ctx: {:?}", offset, lc.cmd, size, le); + if size > bytes.len() { return Err(error::Error::Malformed(format!("{} has size larger than remainder of binary: {:?}", &lc, bytes.len()))) } + match lc.cmd { + LC_SEGMENT => { let comm = bytes.pread_with::<SegmentCommand32> (0, le)?; Ok((Segment32 (comm), size))}, + LC_SEGMENT_64 => { let comm = bytes.pread_with::<SegmentCommand64> (0, le)?; Ok((Segment64 (comm), size))}, + LC_DYSYMTAB => { let comm = bytes.pread_with::<DysymtabCommand> (0, le)?; Ok((Dysymtab (comm), size))}, + LC_LOAD_DYLINKER => { let comm = bytes.pread_with::<DylinkerCommand> (0, le)?; Ok((LoadDylinker (comm), size))}, + LC_ID_DYLINKER => { let comm = bytes.pread_with::<DylinkerCommand> (0, le)?; Ok((IdDylinker (comm), size))}, + LC_UUID => { let comm = bytes.pread_with::<UuidCommand> (0, le)?; Ok((Uuid (comm), size))}, + LC_SYMTAB => { let comm = bytes.pread_with::<SymtabCommand> (0, le)?; Ok((Symtab (comm), size))}, + LC_SYMSEG => { let comm = bytes.pread_with::<SymsegCommand> (0, le)?; Ok((Symseg (comm), size))}, + LC_THREAD => { let comm = bytes.pread_with::<ThreadCommand> (0, le)?; Ok((Thread (comm), size))}, + LC_UNIXTHREAD => { let comm = bytes.pread_with::<ThreadCommand> (0, le)?; Ok((Unixthread (comm), size))}, + LC_LOADFVMLIB => { let comm = bytes.pread_with::<FvmlibCommand> (0, le)?; Ok((LoadFvmlib (comm), size))}, + LC_IDFVMLIB => { let comm = bytes.pread_with::<FvmlibCommand> (0, le)?; Ok((IdFvmlib (comm), size))}, + LC_IDENT => { let comm = bytes.pread_with::<IdentCommand> (0, le)?; Ok((Ident (comm), size))}, + LC_FVMFILE => { let comm = bytes.pread_with::<FvmfileCommand> (0, le)?; Ok((Fvmfile (comm), size))}, + LC_PREPAGE => { let comm = bytes.pread_with::<LoadCommandHeader> (0, le)?; Ok((Prepage (comm), size))}, + LC_LOAD_DYLIB => { let comm = bytes.pread_with::<DylibCommand> (0, le)?; Ok((LoadDylib (comm), size))}, + LC_ID_DYLIB => { let comm = bytes.pread_with::<DylibCommand> (0, le)?; Ok((IdDylib (comm), size))}, + LC_PREBOUND_DYLIB => { let comm = bytes.pread_with::<PreboundDylibCommand> (0, le)?; Ok((PreboundDylib (comm), size))}, + LC_ROUTINES => { let comm = bytes.pread_with::<RoutinesCommand32> (0, le)?; Ok((Routines32 (comm), size))}, + LC_ROUTINES_64 => { let comm = bytes.pread_with::<RoutinesCommand64> (0, le)?; Ok((Routines64 (comm), size))}, + LC_SUB_FRAMEWORK => { let comm = bytes.pread_with::<SubFrameworkCommand> (0, le)?; Ok((SubFramework (comm), size))}, + LC_SUB_UMBRELLA => { let comm = bytes.pread_with::<SubUmbrellaCommand> (0, le)?; Ok((SubUmbrella (comm), size))}, + LC_SUB_CLIENT => { let comm = bytes.pread_with::<SubClientCommand> (0, le)?; Ok((SubClient (comm), size))}, + LC_SUB_LIBRARY => { let comm = bytes.pread_with::<SubLibraryCommand> (0, le)?; Ok((SubLibrary (comm), size))}, + LC_TWOLEVEL_HINTS => { let comm = bytes.pread_with::<TwolevelHintsCommand> (0, le)?; Ok((TwolevelHints (comm), size))}, + LC_PREBIND_CKSUM => { let comm = bytes.pread_with::<PrebindCksumCommand> (0, le)?; Ok((PrebindCksum (comm), size))}, + LC_LOAD_WEAK_DYLIB => { let comm = bytes.pread_with::<DylibCommand> (0, le)?; Ok((LoadWeakDylib (comm), size))}, + LC_RPATH => { let comm = bytes.pread_with::<RpathCommand> (0, le)?; Ok((Rpath (comm), size))}, + LC_CODE_SIGNATURE => { let comm = bytes.pread_with::<LinkeditDataCommand> (0, le)?; Ok((CodeSignature (comm), size))}, + LC_SEGMENT_SPLIT_INFO => { let comm = bytes.pread_with::<LinkeditDataCommand> (0, le)?; Ok((SegmentSplitInfo (comm), size))}, + LC_REEXPORT_DYLIB => { let comm = bytes.pread_with::<DylibCommand> (0, le)?; Ok((ReexportDylib (comm), size))}, + LC_LAZY_LOAD_DYLIB => { let comm = bytes.pread_with::<DylibCommand> (0, le)?; Ok((LazyLoadDylib (comm), size))}, + LC_ENCRYPTION_INFO => { let comm = bytes.pread_with::<EncryptionInfoCommand32>(0, le)?; Ok((EncryptionInfo32 (comm), size))}, + LC_ENCRYPTION_INFO_64 => { let comm = bytes.pread_with::<EncryptionInfoCommand64>(0, le)?; Ok((EncryptionInfo64 (comm), size))}, + LC_DYLD_INFO => { let comm = bytes.pread_with::<DyldInfoCommand> (0, le)?; Ok((DyldInfo (comm), size))}, + LC_DYLD_INFO_ONLY => { let comm = bytes.pread_with::<DyldInfoCommand> (0, le)?; Ok((DyldInfoOnly (comm), size))}, + LC_LOAD_UPWARD_DYLIB => { let comm = bytes.pread_with::<DylibCommand> (0, le)?; Ok((LoadUpwardDylib (comm), size))}, + LC_VERSION_MIN_MACOSX => { let comm = bytes.pread_with::<VersionMinCommand> (0, le)?; Ok((VersionMinMacosx (comm), size))}, + LC_VERSION_MIN_IPHONEOS => { let comm = bytes.pread_with::<VersionMinCommand> (0, le)?; Ok((VersionMinIphoneos (comm), size))}, + LC_FUNCTION_STARTS => { let comm = bytes.pread_with::<LinkeditDataCommand> (0, le)?; Ok((FunctionStarts (comm), size))}, + LC_DYLD_ENVIRONMENT => { let comm = bytes.pread_with::<DylinkerCommand> (0, le)?; Ok((DyldEnvironment (comm), size))}, + LC_MAIN => { let comm = bytes.pread_with::<EntryPointCommand> (0, le)?; Ok((Main (comm), size))}, + LC_DATA_IN_CODE => { let comm = bytes.pread_with::<LinkeditDataCommand> (0, le)?; Ok((DataInCode (comm), size))}, + LC_SOURCE_VERSION => { let comm = bytes.pread_with::<SourceVersionCommand> (0, le)?; Ok((SourceVersion (comm), size))}, + LC_DYLIB_CODE_SIGN_DRS => { let comm = bytes.pread_with::<LinkeditDataCommand> (0, le)?; Ok((DylibCodeSignDrs (comm), size))}, + LC_LINKER_OPTION => { let comm = bytes.pread_with::<LinkeditDataCommand> (0, le)?; Ok((LinkerOption (comm), size))}, + LC_LINKER_OPTIMIZATION_HINT => {let comm = bytes.pread_with::<LinkeditDataCommand> (0, le)?; Ok((LinkerOptimizationHint (comm), size))}, + _ => Ok((Unimplemented (lc), size)), + } + } +} + +impl CommandVariant { + pub fn cmdsize(&self) -> usize { + use self::CommandVariant::*; + let cmdsize = match *self { + Segment32 (comm) => comm.cmdsize, + Segment64 (comm) => comm.cmdsize, + Uuid (comm) => comm.cmdsize, + Symtab (comm) => comm.cmdsize, + Symseg (comm) => comm.cmdsize, + Thread (comm) => comm.cmdsize, + Unixthread (comm) => comm.cmdsize, + LoadFvmlib (comm) => comm.cmdsize, + IdFvmlib (comm) => comm.cmdsize, + Ident (comm) => comm.cmdsize, + Fvmfile (comm) => comm.cmdsize, + Prepage (comm) => comm.cmdsize, + Dysymtab (comm) => comm.cmdsize, + LoadDylib (comm) => comm.cmdsize, + IdDylib (comm) => comm.cmdsize, + LoadDylinker (comm) => comm.cmdsize, + IdDylinker (comm) => comm.cmdsize, + PreboundDylib (comm) => comm.cmdsize, + Routines32 (comm) => comm.cmdsize, + Routines64 (comm) => comm.cmdsize, + SubFramework (comm) => comm.cmdsize, + SubUmbrella (comm) => comm.cmdsize, + SubClient (comm) => comm.cmdsize, + SubLibrary (comm) => comm.cmdsize, + TwolevelHints (comm) => comm.cmdsize, + PrebindCksum (comm) => comm.cmdsize, + LoadWeakDylib (comm) => comm.cmdsize, + Rpath (comm) => comm.cmdsize, + CodeSignature (comm) => comm.cmdsize, + SegmentSplitInfo (comm) => comm.cmdsize, + ReexportDylib (comm) => comm.cmdsize, + LazyLoadDylib (comm) => comm.cmdsize, + EncryptionInfo32 (comm) => comm.cmdsize, + EncryptionInfo64 (comm) => comm.cmdsize, + DyldInfo (comm) => comm.cmdsize, + DyldInfoOnly (comm) => comm.cmdsize, + LoadUpwardDylib (comm) => comm.cmdsize, + VersionMinMacosx (comm) => comm.cmdsize, + VersionMinIphoneos (comm) => comm.cmdsize, + FunctionStarts (comm) => comm.cmdsize, + DyldEnvironment (comm) => comm.cmdsize, + Main (comm) => comm.cmdsize, + DataInCode (comm) => comm.cmdsize, + SourceVersion (comm) => comm.cmdsize, + DylibCodeSignDrs (comm) => comm.cmdsize, + LinkerOption (comm) => comm.cmdsize, + LinkerOptimizationHint (comm) => comm.cmdsize, + Unimplemented (comm) => comm.cmdsize, + }; + cmdsize as usize + } + pub fn cmd(&self) -> u32 { + use self::CommandVariant::*; + match *self { + Segment32 (comm) => comm.cmd, + Segment64 (comm) => comm.cmd, + Uuid (comm) => comm.cmd, + Symtab (comm) => comm.cmd, + Symseg (comm) => comm.cmd, + Thread (comm) => comm.cmd, + Unixthread (comm) => comm.cmd, + LoadFvmlib (comm) => comm.cmd, + IdFvmlib (comm) => comm.cmd, + Ident (comm) => comm.cmd, + Fvmfile (comm) => comm.cmd, + Prepage (comm) => comm.cmd, + Dysymtab (comm) => comm.cmd, + LoadDylib (comm) => comm.cmd, + IdDylib (comm) => comm.cmd, + LoadDylinker (comm) => comm.cmd, + IdDylinker (comm) => comm.cmd, + PreboundDylib (comm) => comm.cmd, + Routines32 (comm) => comm.cmd, + Routines64 (comm) => comm.cmd, + SubFramework (comm) => comm.cmd, + SubUmbrella (comm) => comm.cmd, + SubClient (comm) => comm.cmd, + SubLibrary (comm) => comm.cmd, + TwolevelHints (comm) => comm.cmd, + PrebindCksum (comm) => comm.cmd, + LoadWeakDylib (comm) => comm.cmd, + Rpath (comm) => comm.cmd, + CodeSignature (comm) => comm.cmd, + SegmentSplitInfo (comm) => comm.cmd, + ReexportDylib (comm) => comm.cmd, + LazyLoadDylib (comm) => comm.cmd, + EncryptionInfo32 (comm) => comm.cmd, + EncryptionInfo64 (comm) => comm.cmd, + DyldInfo (comm) => comm.cmd, + DyldInfoOnly (comm) => comm.cmd, + LoadUpwardDylib (comm) => comm.cmd, + VersionMinMacosx (comm) => comm.cmd, + VersionMinIphoneos (comm) => comm.cmd, + FunctionStarts (comm) => comm.cmd, + DyldEnvironment (comm) => comm.cmd, + Main (comm) => comm.cmd, + DataInCode (comm) => comm.cmd, + SourceVersion (comm) => comm.cmd, + DylibCodeSignDrs (comm) => comm.cmd, + LinkerOption (comm) => comm.cmd, + LinkerOptimizationHint (comm) => comm.cmd, + Unimplemented (comm) => comm.cmd, + } + } +} + +#[derive(Debug)] +/// A tagged LoadCommand union +pub struct LoadCommand { + /// The offset this load command occurs at + pub offset: usize, + /// Which load command this is inside a variant + pub command: CommandVariant, +} + +impl LoadCommand { + /// Parse a load command from `bytes` at `offset` with the `le` endianness + pub fn parse(bytes: &[u8], offset: &mut usize, le: scroll::Endian) -> error::Result<Self> { + let start = *offset; + let command = bytes.pread_with::<CommandVariant>(start, le)?; + let size = command.cmdsize(); + *offset = start + size; + Ok(LoadCommand { offset: start, command }) + } +} diff --git a/third_party/rust/goblin/src/mach/mod.rs b/third_party/rust/goblin/src/mach/mod.rs new file mode 100644 index 0000000000..26a84afc54 --- /dev/null +++ b/third_party/rust/goblin/src/mach/mod.rs @@ -0,0 +1,413 @@ +//! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions +use core::fmt; +use crate::alloc::vec::Vec; + +use log::debug; + +use scroll::{Pread, BE}; +use scroll::ctx::SizeWith; + +use crate::error; +use crate::container; + +pub mod header; +pub mod constants; +pub mod fat; +pub mod load_command; +pub mod symbols; +pub mod exports; +pub mod imports; +pub mod bind_opcodes; +pub mod relocation; +pub mod segment; + +pub use self::constants::cputype as cputype; + +/// Returns a big endian magical number +pub fn peek(bytes: &[u8], offset: usize) -> error::Result<u32> { + Ok(bytes.pread_with::<u32>(offset, scroll::BE)?) +} + +/// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number. +pub fn parse_magic_and_ctx(bytes: &[u8], offset: usize) -> error::Result<(u32, Option<container::Ctx>)> { + use crate::mach::header::*; + use crate::container::Container; + let magic = bytes.pread_with::<u32>(offset, BE)?; + let ctx = match magic { + MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => { + let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64; + let le = scroll::Endian::from(is_lsb); + let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 { Container::Big } else { Container::Little }; + Some(container::Ctx::new(container, le)) + }, + _ => None, + }; + Ok((magic, ctx)) +} + +/// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser +pub struct MachO<'a> { + /// The mach-o header + pub header: header::Header, + /// The load commands tell the kernel and dynamic linker how to use/interpret this binary + pub load_commands: Vec<load_command::LoadCommand>, + /// The load command "segments" - typically the pieces of the binary that are loaded into memory + pub segments: segment::Segments<'a>, + /// The "Nlist" style symbols in this binary - strippable + pub symbols: Option<symbols::Symbols<'a>>, + /// The dylibs this library depends on + pub libs: Vec<&'a str>, + /// The entry point (as a virtual memory address), 0 if none + pub entry: u64, + /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint + pub old_style_entry: bool, + /// The name of the dylib, if any + pub name: Option<&'a str>, + /// Are we a little-endian binary? + pub little_endian: bool, + /// Are we a 64-bit binary + pub is_64: bool, + data: &'a [u8], + ctx: container::Ctx, + export_trie: Option<exports::ExportTrie<'a>>, + bind_interpreter: Option<imports::BindInterpreter<'a>>, +} + +impl<'a> fmt::Debug for MachO<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("MachO") + .field("header", &self.header) + .field("load_commands", &self.load_commands) + .field("segments", &self.segments) + .field("entry", &self.entry) + .field("old_style_entry", &self.old_style_entry) + .field("libs", &self.libs) + .field("name", &self.name) + .field("little_endian", &self.little_endian) + .field("is_64", &self.is_64) + .field("symbols()", &self.symbols().collect::<Vec<_>>()) + .field("exports()", &self.exports()) + .field("imports()", &self.imports()) + .finish() + } +} + +impl<'a> MachO<'a> { + /// Is this a relocatable object file? + pub fn is_object_file(&self) -> bool { + self.header.filetype == header::MH_OBJECT + } + /// Return an iterator over all the symbols in this binary + pub fn symbols(&self) -> symbols::SymbolIterator<'a> { + if let Some(ref symbols) = self.symbols { + symbols.into_iter() + } else { + symbols::SymbolIterator::default() + } + } + /// Return a vector of the relocations in this binary + pub fn relocations(&self) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>> { + debug!("Iterating relocations"); + let mut relocs = Vec::new(); + for (_i, segment) in (&self.segments).into_iter().enumerate() { + for (j, section) in segment.into_iter().enumerate() { + let (section, _data) = section?; + if section.nreloc > 0 { + relocs.push((j, section.iter_relocations(self.data, self.ctx), section)); + } + } + } + Ok(relocs) + } + /// Return the exported symbols in this binary (if any) + pub fn exports(&self) -> error::Result<Vec<exports::Export>> { + if let Some(ref trie) = self.export_trie { + trie.exports(self.libs.as_slice()) + } else { + Ok(vec![]) + } + } + /// Return the imported symbols in this binary that dyld knows about (if any) + pub fn imports(&self) -> error::Result<Vec<imports::Import>> { + if let Some(ref interpreter) = self.bind_interpreter { + interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx) + } else { + Ok(vec![]) + } + } + /// Parses the Mach-o binary from `bytes` at `offset` + pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>> { + let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?; + let ctx = if let Some(ctx) = maybe_ctx { ctx } else { return Err(error::Error::BadMagic(u64::from(magic))) }; + debug!("Ctx: {:?}", ctx); + let offset = &mut offset; + let header: header::Header = bytes.pread_with(*offset, ctx)?; + debug!("Mach-o header: {:?}", header); + let little_endian = ctx.le.is_little(); + let is_64 = ctx.container.is_big(); + *offset += header::Header::size_with(&ctx.container); + let ncmds = header.ncmds; + let mut cmds: Vec<load_command::LoadCommand> = Vec::with_capacity(ncmds); + let mut symbols = None; + let mut libs = vec!["self"]; + let mut export_trie = None; + let mut bind_interpreter = None; + let mut unixthread_entry_address = None; + let mut main_entry_offset = None; + let mut name = None; + let mut segments = segment::Segments::new(ctx); + for i in 0..ncmds { + let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?; + debug!("{} - {:?}", i, cmd); + match cmd.command { + load_command::CommandVariant::Segment32(command) => { + // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue? + segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?) + }, + load_command::CommandVariant::Segment64(command) => { + segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?) + }, + load_command::CommandVariant::Symtab(command) => { + symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?); + }, + load_command::CommandVariant::LoadDylib (command) + | load_command::CommandVariant::LoadUpwardDylib(command) + | load_command::CommandVariant::ReexportDylib (command) + | load_command::CommandVariant::LoadWeakDylib (command) + | load_command::CommandVariant::LazyLoadDylib (command) => { + let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; + libs.push(lib); + }, + load_command::CommandVariant::DyldInfo (command) + | load_command::CommandVariant::DyldInfoOnly(command) => { + export_trie = Some(exports::ExportTrie::new(bytes, &command)); + bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command)); + }, + load_command::CommandVariant::Unixthread(command) => { + // dyld cares only about the first LC_UNIXTHREAD + if unixthread_entry_address.is_none() { + unixthread_entry_address = Some(command.instruction_pointer(header.cputype)?); + } + }, + load_command::CommandVariant::Main(command) => { + // dyld cares only about the first LC_MAIN + if main_entry_offset.is_none() { + main_entry_offset = Some(command.entryoff); + } + }, + load_command::CommandVariant::IdDylib(command) => { + let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; + libs[0] = id; + name = Some(id); + }, + _ => () + } + cmds.push(cmd) + } + + // dyld prefers LC_MAIN over LC_UNIXTHREAD + // choose the same way here + let (entry, old_style_entry) = if let Some(offset) = main_entry_offset { + // map the entrypoint offset to a virtual memory address + let base_address = segments.iter() + .filter(|s| &s.segname[0..7] == b"__TEXT\0") + .map(|s| s.vmaddr - s.fileoff) + .next() + .ok_or_else(|| + error::Error::Malformed(format!("image specifies LC_MAIN offset {} but has no __TEXT segment", offset)) + )?; + + (base_address + offset, false) + } else if let Some(address) = unixthread_entry_address { + (address, true) + } else { + (0, false) + }; + + Ok(MachO { + header, + load_commands: cmds, + segments, + symbols, + libs, + export_trie, + bind_interpreter, + entry, + old_style_entry, + name, + ctx, + is_64, + little_endian, + data: bytes, + }) + } +} + +/// A Mach-o multi architecture (Fat) binary container +pub struct MultiArch<'a> { + data: &'a [u8], + start: usize, + pub narches: usize, +} + +/// Iterator over the fat architecture headers in a `MultiArch` container +pub struct FatArchIterator<'a> { + index: usize, + data: &'a[u8], + narches: usize, + start: usize, +} + +impl<'a> Iterator for FatArchIterator<'a> { + type Item = error::Result<fat::FatArch>; + fn next(&mut self) -> Option<Self::Item> { + if self.index >= self.narches { + None + } else { + let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start; + let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE).map_err(core::convert::Into::into); + self.index += 1; + Some(arch) + } + } +} + +/// Iterator over every `MachO` binary contained in this `MultiArch` container +pub struct MachOIterator<'a> { + index: usize, + data: &'a[u8], + narches: usize, + start: usize, +} + +impl<'a> Iterator for MachOIterator<'a> { + type Item = error::Result<MachO<'a>>; + fn next(&mut self) -> Option<Self::Item> { + if self.index >= self.narches { + None + } else { + let index = self.index; + let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; + self.index += 1; + match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) { + Ok(arch) => { + let bytes = arch.slice(self.data); + let binary = MachO::parse(bytes, 0); + Some(binary) + }, + Err(e) => Some(Err(e.into())) + } + } + } +} + +impl<'a, 'b> IntoIterator for &'b MultiArch<'a> { + type Item = error::Result<MachO<'a>>; + type IntoIter = MachOIterator<'a>; + fn into_iter(self) -> Self::IntoIter { + MachOIterator { + index: 0, + data: self.data, + narches: self.narches, + start: self.start, + } + } +} + +impl<'a> MultiArch<'a> { + /// Lazily construct `Self` + pub fn new(bytes: &'a [u8]) -> error::Result<Self> { + let header = fat::FatHeader::parse(bytes)?; + Ok(MultiArch { + data: bytes, + start: fat::SIZEOF_FAT_HEADER, + narches: header.nfat_arch as usize + }) + } + /// Iterate every fat arch header + pub fn iter_arches(&self) -> FatArchIterator { + FatArchIterator { + index: 0, + data: self.data, + narches: self.narches, + start: self.start, + } + } + /// Return all the architectures in this binary + pub fn arches(&self) -> error::Result<Vec<fat::FatArch>> { + let mut arches = Vec::with_capacity(self.narches); + for arch in self.iter_arches() { + arches.push(arch?); + } + Ok(arches) + } + /// Try to get the Mach-o binary at `index` + pub fn get(&self, index: usize) -> error::Result<MachO<'a>> { + if index >= self.narches { + return Err(error::Error::Malformed(format!("Requested the {}-th binary, but there are only {} architectures in this container", index, self.narches))) + } + let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; + let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?; + let bytes = arch.slice(self.data); + Ok(MachO::parse(bytes, 0)?) + } + + pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>(&'a self, f: F) -> Option<error::Result<MachO<'a>>> { + for (i, arch) in self.iter_arches().enumerate() { + if f(arch) { + return Some(self.get(i)); + } + } + None + } + /// Try and find the `cputype` in `Self`, if there is one + pub fn find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>> { + for arch in self.iter_arches() { + let arch = arch?; + if arch.cputype == cputype { return Ok(Some(arch)) } + } + Ok(None) + } +} + +impl<'a> fmt::Debug for MultiArch<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("MultiArch") + .field("arches", &self.arches().unwrap()) + .field("data", &self.data.len()) + .finish() + } +} + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +/// Either a collection of multiple architectures, or a single mach-o binary +pub enum Mach<'a> { + /// A "fat" multi-architecture binary container + Fat(MultiArch<'a>), + /// A regular Mach-o binary + Binary(MachO<'a>) +} + +impl<'a> Mach<'a> { + /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary + pub fn parse(bytes: &'a [u8]) -> error::Result<Self> { + let size = bytes.len(); + if size < 4 { + let error = error::Error::Malformed("size is smaller than a magical number".into()); + return Err(error); + } + let magic = peek(&bytes, 0)?; + match magic { + fat::FAT_MAGIC => { + let multi = MultiArch::new(bytes)?; + Ok(Mach::Fat(multi)) + }, + // we might be a regular binary + _ => { + let binary = MachO::parse(bytes, 0)?; + Ok(Mach::Binary(binary)) + } + } + } +} diff --git a/third_party/rust/goblin/src/mach/relocation.rs b/third_party/rust/goblin/src/mach/relocation.rs new file mode 100644 index 0000000000..940190d524 --- /dev/null +++ b/third_party/rust/goblin/src/mach/relocation.rs @@ -0,0 +1,228 @@ +// Format of a relocation entry of a Mach-O file. Modified from the 4.3BSD +// format. The modifications from the original format were changing the value +// of the r_symbolnum field for "local" (r_extern == 0) relocation entries. +// This modification is required to support symbols in an arbitrary number of +// sections not just the three sections (text, data and bss) in a 4.3BSD file. +// Also the last 4 bits have had the r_type tag added to them. + +// The r_address is not really the address as it's name indicates but an offset. +// In 4.3BSD a.out objects this offset is from the start of the "segment" for +// which relocation entry is for (text or data). For Mach-O object files it is +// also an offset but from the start of the "section" for which the relocation +// entry is for. See comments in <mach-o/loader.h> about the r_address feild +// in images for used with the dynamic linker. + +// In 4.3BSD a.out objects if r_extern is zero then r_symbolnum is an ordinal +// for the segment the symbol being relocated is in. These ordinals are the +// symbol types N_TEXT, N_DATA, N_BSS or N_ABS. In Mach-O object files these +// ordinals refer to the sections in the object file in the order their section +// structures appear in the headers of the object file they are in. The first +// section has the ordinal 1, the second 2, and so on. This means that the +// same ordinal in two different object files could refer to two different +// sections. And further could have still different ordinals when combined +// by the link-editor. The value R_ABS is used for relocation entries for +// absolute symbols which need no further relocation. +use core::fmt; +use crate::mach; +use scroll::{Pread, Pwrite, IOwrite, SizeWith, IOread}; + +// TODO: armv7 relocations are scattered, must and r_address with 0x8000_0000 to check if its scattered or not +#[derive(Copy, Clone, Pread, Pwrite, IOwrite, SizeWith, IOread)] +#[repr(C)] +pub struct RelocationInfo { + /// Offset in the section to what is being relocated + pub r_address: i32, + /// Contains all of the relocation info as a bitfield. + /// r_symbolnum, 24 bits, r_pcrel 1 bit, r_length 2 bits, r_extern 1 bit, r_type 4 bits + pub r_info: u32, +} + +pub const SIZEOF_RELOCATION_INFO: usize = 8; + +impl RelocationInfo { + /// Symbol index if `r_extern` == 1 or section ordinal if `r_extern` == 0. In bits :24 + #[inline] + pub fn r_symbolnum(self) -> usize { + (self.r_info & 0x00ff_ffffu32) as usize + } + /// Was relocated pc relative already, 1 bit + #[inline] + pub fn r_pcrel(self) -> u8 { + ((self.r_info & 0x0100_0000u32) >> 24) as u8 + } + /// The length of the relocation, 0=byte, 1=word, 2=long, 3=quad, 2 bits + #[inline] + pub fn r_length(self) -> u8 { + ((self.r_info & 0x0600_0000u32) >> 25) as u8 + } + /// Does not include value of sym referenced, 1 bit + #[inline] + pub fn r_extern(self) -> u8 { + ((self.r_info & 0x0800_0000) >> 27) as u8 + } + /// Ff not 0, machine specific relocation type, in bits :4 + #[inline] + pub fn r_type(self) -> u8 { + ((self.r_info & 0xf000_0000) >> 28) as u8 + } + /// If true, this relocation is for a symbol; if false, or a section ordinal otherwise + #[inline] + pub fn is_extern(self) -> bool { + self.r_extern() == 1 + } + /// If true, this is a PIC relocation + #[inline] + pub fn is_pic(self) -> bool { + self.r_pcrel() > 0 + } + /// Returns a string representation of this relocation, given the machine `cputype` + pub fn to_str(self, cputype: mach::cputype::CpuType) -> &'static str { + reloc_to_str(self.r_type(), cputype) + } +} + +/// Absolute relocation type for Mach-O files +pub const R_ABS: u8 = 0; + +impl fmt::Debug for RelocationInfo { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("RelocationInfo") + .field("r_address", &format_args!("{:#x}", &self.r_address)) + .field("r_info", &format_args!("{:#x}", &self.r_info)) + .field("r_symbolnum", &format_args!("{:#x}", &self.r_symbolnum())) + .field("r_pcrel", &(self.r_pcrel())) + .field("r_length", &self.r_length()) + .field("r_extern", &self.r_extern()) + .field("r_type", &self.r_type()) + .finish() + } +} + +pub type RelocType = u8; + +/// Absolute address +pub const X86_64_RELOC_UNSIGNED: RelocType = 0; +/// Signed 32-bit displacement +pub const X86_64_RELOC_SIGNED: RelocType = 1; +/// A CALL/JMP instruction with 32-bit displacement +pub const X86_64_RELOC_BRANCH: RelocType = 2; +/// A MOVQ load of a GOT entry +pub const X86_64_RELOC_GOT_LOAD: RelocType = 3; +/// Other GOT references +pub const X86_64_RELOC_GOT: RelocType = 4; +/// Must be followed by a X86_64_RELOC_UNSIGNED relocation +pub const X86_64_RELOC_SUBTRACTOR: RelocType = 5; +/// for signed 32-bit displacement with a -1 addend +pub const X86_64_RELOC_SIGNED_1: RelocType = 6; +/// for signed 32-bit displacement with a -2 addend +pub const X86_64_RELOC_SIGNED_2: RelocType = 7; +/// for signed 32-bit displacement with a -4 addend +pub const X86_64_RELOC_SIGNED_4: RelocType = 8; +/// for thread local variables +pub const X86_64_RELOC_TLV: RelocType = 9; + +// x86 relocations +pub const GENERIC_RELOC_VANILLA: RelocType = 0; +pub const GENERIC_RELOC_PAIR: RelocType = 1; +pub const GENERIC_RELOC_SECTDIFF: RelocType = 2; +pub const GENERIC_RELOC_LOCAL_SECTDIFF: RelocType = 3; +pub const GENERIC_RELOC_PB_LA_P: RelocType = 4; + +// arm relocations +pub const ARM_RELOC_VANILLA: RelocType = GENERIC_RELOC_VANILLA; +pub const ARM_RELOC_PAIR: RelocType = GENERIC_RELOC_PAIR; +pub const ARM_RELOC_SECTDIFF: RelocType = GENERIC_RELOC_SECTDIFF; +pub const ARM_RELOC_LOCAL_SECTDIFF: RelocType = 3; +pub const ARM_RELOC_PB_LA_PTR: RelocType = 4; +pub const ARM_RELOC_BR24: RelocType = 5; +pub const ARM_THUMB_RELOC_BR22: RelocType = 6; +/// Obsolete +pub const ARM_THUMB_32BIT_BRANCH: RelocType = 7; +pub const ARM_RELOC_HALF: RelocType = 8; +pub const ARM_RELOC_HALF_SECTDIFF: RelocType = 9; + +/// For pointers. +pub const ARM64_RELOC_UNSIGNED: RelocType = 0; +/// Must be followed by an ARM64_RELOC_UNSIGNED +pub const ARM64_RELOC_SUBTRACTOR: RelocType = 1; +/// A B/BL instruction with 26-bit displacement. +pub const ARM64_RELOC_BRANCH26: RelocType = 2; +/// PC-rel distance to page of target. +pub const ARM64_RELOC_PAGE21: RelocType = 3; +/// Offset within page, scaled by r_length. +pub const ARM64_RELOC_PAGEOFF12: RelocType = 4; +/// PC-rel distance to page of GOT slot. +pub const ARM64_RELOC_GOT_LOAD_PAGE21: RelocType = 5; +/// Offset within page of GOT slot, scaled by r_length. +pub const ARM64_RELOC_GOT_LOAD_PAGEOFF12: RelocType = 6; +/// For pointers to GOT slots. +pub const ARM64_RELOC_POINTER_TO_GOT: RelocType = 7; +/// PC-rel distance to page of TLVP slot. +pub const ARM64_RELOC_TLVP_LOAD_PAGE21: RelocType = 8; +/// Offset within page of TLVP slot, scaled by r_length. +pub const ARM64_RELOC_TLVP_LOAD_PAGEOFF12: RelocType = 9; +/// Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12. +pub const ARM64_RELOC_ADDEND: RelocType = 10; + +pub fn reloc_to_str(reloc: RelocType, cputype: mach::cputype::CpuType) -> &'static str { + use crate::mach::constants::cputype::*; + match cputype { + CPU_TYPE_ARM64 | CPU_TYPE_ARM64_32 => { + match reloc { + ARM64_RELOC_UNSIGNED => "ARM64_RELOC_UNSIGNED", + ARM64_RELOC_SUBTRACTOR => "ARM64_RELOC_SUBTRACTOR", + ARM64_RELOC_BRANCH26 => "ARM64_RELOC_BRANCH26", + ARM64_RELOC_PAGE21 => "ARM64_RELOC_PAGE21", + ARM64_RELOC_PAGEOFF12 => "ARM64_RELOC_PAGEOFF12", + ARM64_RELOC_GOT_LOAD_PAGE21 => "ARM64_RELOC_GOT_LOAD_PAGE21", + ARM64_RELOC_GOT_LOAD_PAGEOFF12 => "ARM64_RELOC_GOT_LOAD_PAGEOFF12", + ARM64_RELOC_POINTER_TO_GOT => "ARM64_RELOC_POINTER_TO_GOT", + ARM64_RELOC_TLVP_LOAD_PAGE21 => "ARM64_RELOC_TLVP_LOAD_PAGE21", + ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => "ARM64_RELOC_TLVP_LOAD_PAGEOFF12", + ARM64_RELOC_ADDEND => "ARM64_RELOC_ADDEND", + _ => "UNKNOWN", + } + }, + CPU_TYPE_X86_64 => { + match reloc { + X86_64_RELOC_UNSIGNED => "X86_64_RELOC_UNSIGNED", + X86_64_RELOC_SIGNED => "X86_64_RELOC_SIGNED", + X86_64_RELOC_BRANCH => "X86_64_RELOC_BRANCH", + X86_64_RELOC_GOT_LOAD => "X86_64_RELOC_GOT_LOAD", + X86_64_RELOC_GOT => "X86_64_RELOC_GOT", + X86_64_RELOC_SUBTRACTOR => "X86_64_RELOC_SUBTRACTOR", + X86_64_RELOC_SIGNED_1 => "X86_64_RELOC_SIGNED_1", + X86_64_RELOC_SIGNED_2 => "X86_64_RELOC_SIGNED_2", + X86_64_RELOC_SIGNED_4 => "X86_64_RELOC_SIGNED_4", + X86_64_RELOC_TLV => "X86_64_RELOC_TLV", + _ => "UNKNOWN", + } + }, + CPU_TYPE_ARM => { + match reloc { + ARM_RELOC_VANILLA => "ARM_RELOC_VANILLA", + ARM_RELOC_PAIR => "ARM_RELOC_PAIR", + ARM_RELOC_SECTDIFF => "ARM_RELOC_SECTDIFF", + ARM_RELOC_LOCAL_SECTDIFF => "ARM_RELOC_LOCAL_SECTDIFF", + ARM_RELOC_PB_LA_PTR => "ARM_RELOC_PB_LA_PTR", + ARM_RELOC_BR24 => "ARM_RELOC_BR24", + ARM_THUMB_RELOC_BR22 => "ARM_THUMB_RELOC_BR22", + ARM_THUMB_32BIT_BRANCH => "ARM_THUMB_32BIT_BRANCH", + ARM_RELOC_HALF => "ARM_RELOC_HALF", + ARM_RELOC_HALF_SECTDIFF => "ARM_RELOC_HALF_SECTDIFF", + _ => "UNKNOWN", + } + }, + CPU_TYPE_X86 => { + match reloc { + GENERIC_RELOC_VANILLA => "GENERIC_RELOC_VANILLA", + GENERIC_RELOC_PAIR => "GENERIC_RELOC_PAIR", + GENERIC_RELOC_SECTDIFF => "GENERIC_RELOC_SECTDIFF", + GENERIC_RELOC_LOCAL_SECTDIFF => "GENERIC_RELOC_LOCAL_SECTDIFF", + GENERIC_RELOC_PB_LA_P => "GENERIC_RELOC_PB_LA_P", + _ => "UNKNOWN", + } + }, + _ => "BAD_CPUTYPE" + } +} diff --git a/third_party/rust/goblin/src/mach/segment.rs b/third_party/rust/goblin/src/mach/segment.rs new file mode 100644 index 0000000000..c070cb6732 --- /dev/null +++ b/third_party/rust/goblin/src/mach/segment.rs @@ -0,0 +1,518 @@ +use scroll::{Pread, Pwrite}; +use scroll::ctx::{self, SizeWith}; + +use log::{debug, warn}; + +use core::fmt; +use core::ops::{Deref, DerefMut}; +use crate::alloc::boxed::Box; +use crate::alloc::vec::Vec; + +use crate::container; +use crate::error; + +use crate::mach::relocation::RelocationInfo; +use crate::mach::load_command::{Section32, Section64, SegmentCommand32, SegmentCommand64, SIZEOF_SECTION_32, SIZEOF_SECTION_64, SIZEOF_SEGMENT_COMMAND_32, SIZEOF_SEGMENT_COMMAND_64, LC_SEGMENT, LC_SEGMENT_64}; +use crate::mach::constants::{SECTION_TYPE, S_ZEROFILL}; + +pub struct RelocationIterator<'a> { + data: &'a [u8], + nrelocs: usize, + offset: usize, + count: usize, + ctx: scroll::Endian, +} + +impl<'a> Iterator for RelocationIterator<'a> { + type Item = error::Result<RelocationInfo>; + fn next(&mut self) -> Option<Self::Item> { + if self.count >= self.nrelocs { + None + } else { + self.count += 1; + match self.data.gread_with(&mut self.offset, self.ctx) { + Ok(res) => Some(Ok(res)), + Err(e) => Some(Err(e.into())) + } + } + } +} + +/// Generalized 32/64 bit Section +#[derive(Default)] +pub struct Section { + /// name of this section + pub sectname: [u8; 16], + /// segment this section goes in + pub segname: [u8; 16], + /// memory address of this section + pub addr: u64, + /// size in bytes of this section + pub size: u64, + /// file offset of this section + pub offset: u32, + /// section alignment (power of 2) + pub align: u32, + /// file offset of relocation entries + pub reloff: u32, + /// number of relocation entries + pub nreloc: u32, + /// flags (section type and attributes + pub flags: u32, +} + +impl Section { + /// The name of this section + pub fn name(&self) -> error::Result<&str> { + Ok(self.sectname.pread::<&str>(0)?) + } + /// The containing segment's name + pub fn segname(&self) -> error::Result<&str> { + Ok(self.segname.pread::<&str>(0)?) + } + /// Iterate this sections relocations given `data`; `data` must be the original binary + pub fn iter_relocations<'b>(&self, data: &'b [u8], ctx: container::Ctx) -> RelocationIterator<'b> { + let offset = self.reloff as usize; + debug!("Relocations for {} starting at offset: {:#x}", self.name().unwrap_or("BAD_SECTION_NAME"), offset); + RelocationIterator { + offset, + nrelocs: self.nreloc as usize, + count: 0, + data, + ctx: ctx.le, + } + } +} + +impl From<Section> for Section64 { + fn from(section: Section) -> Self { + Section64 { + sectname: section.sectname, + segname: section.segname, + addr: section.addr as u64, + size: section.size as u64, + offset: section.offset, + align: section.align, + reloff: section.reloff, + nreloc: section.nreloc, + flags: section.flags, + reserved1: 0, + reserved2: 0, + reserved3: 0, + } + } +} + +impl From<Section> for Section32 { + fn from(section: Section) -> Self { + Section32 { + sectname: section.sectname, + segname: section.segname, + addr: section.addr as u32, + size: section.size as u32, + offset: section.offset, + align: section.align, + reloff: section.reloff, + nreloc: section.nreloc, + flags: section.flags, + reserved1: 0, + reserved2: 0, + } + } +} + +impl fmt::Debug for Section { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Section") + .field("sectname", &self.name().unwrap()) + .field("segname", &self.segname().unwrap()) + .field("addr", &self.addr) + .field("size", &self.size) + .field("offset", &self.offset) + .field("align", &self.align) + .field("reloff", &self.reloff) + .field("nreloc", &self.nreloc) + .field("flags", &self.flags) + .finish() + } +} + +impl From<Section32> for Section { + fn from(section: Section32) -> Self { + Section { + sectname: section.sectname, + segname: section.segname, + addr: u64::from(section.addr), + size: u64::from(section.size), + offset: section.offset, + align: section.align, + reloff: section.reloff, + nreloc: section.nreloc, + flags: section.flags, + } + } +} + +impl From<Section64> for Section { + fn from(section: Section64) -> Self { + Section { + sectname: section.sectname, + segname: section.segname, + addr: section.addr, + size: section.size, + offset: section.offset, + align: section.align, + reloff: section.reloff, + nreloc: section.nreloc, + flags: section.flags, + } + } +} + +impl<'a> ctx::TryFromCtx<'a, container::Ctx> for Section { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], ctx: container::Ctx) -> Result<(Self, usize), Self::Error> { + match ctx.container { + container::Container::Little => { + let section = Section::from(bytes.pread_with::<Section32>(0, ctx.le)?); + Ok((section, SIZEOF_SECTION_32)) + }, + container::Container::Big => { + let section = Section::from(bytes.pread_with::<Section64>(0, ctx.le)?); + Ok((section, SIZEOF_SECTION_64)) + }, + } + } +} + +impl ctx::SizeWith<container::Ctx> for Section { + fn size_with(ctx: &container::Ctx) -> usize { + match ctx.container { + container::Container::Little => SIZEOF_SECTION_32, + container::Container::Big => SIZEOF_SECTION_64, + } + } +} + +impl ctx::TryIntoCtx<container::Ctx> for Section { + type Error = crate::error::Error; + fn try_into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) -> Result<usize, Self::Error> { + if ctx.is_big () { + bytes.pwrite_with::<Section64>(self.into(), 0, ctx.le)?; + } else { + bytes.pwrite_with::<Section32>(self.into(), 0, ctx.le)?; + } + Ok(Self::size_with(&ctx)) + } +} + +impl ctx::IntoCtx<container::Ctx> for Section { + fn into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) { + bytes.pwrite_with(self, 0, ctx).unwrap(); + } +} + +pub struct SectionIterator<'a> { + data: &'a [u8], + count: usize, + offset: usize, + idx: usize, + ctx: container::Ctx, +} + +pub type SectionData<'a> = &'a [u8]; + +impl<'a> ::core::iter::ExactSizeIterator for SectionIterator<'a> { + fn len(&self) -> usize { + self.count + } +} + +impl<'a> Iterator for SectionIterator<'a> { + type Item = error::Result<(Section, SectionData<'a>)>; + fn next(&mut self) -> Option<Self::Item> { + if self.idx >= self.count { + None + } else { + self.idx += 1; + match self.data.gread_with::<Section>(&mut self.offset, self.ctx) { + Ok(section) => { + let data = if section.flags & SECTION_TYPE == S_ZEROFILL { + &[] + } else { + // it's not uncommon to encounter macho files where files are + // truncated but the sections are still remaining in the header. + // Because of this we want to not panic here but instead just + // slice down to a empty data slice. This way only if code + // actually needs to access those sections it will fall over. + self.data + .get(section.offset as usize..) + .unwrap_or_else(|| { + warn!("section #{} offset {} out of bounds", self.idx, section.offset); + &[] + }) + .get(..section.size as usize) + .unwrap_or_else(|| { + warn!("section #{} size {} out of bounds", self.idx, section.size); + &[] + }) + }; + Some(Ok((section, data))) + }, + Err(e) => Some(Err(e)) + } + } + } +} + +impl<'a, 'b> IntoIterator for &'b Segment<'a> { + type Item = error::Result<(Section, SectionData<'a>)>; + type IntoIter = SectionIterator<'a>; + fn into_iter(self) -> Self::IntoIter { + SectionIterator { + data: self.raw_data, + count: self.nsects as usize, + offset: self.offset + Segment::size_with(&self.ctx), + idx: 0, + ctx: self.ctx, + } + } +} + +/// Generalized 32/64 bit Segment Command +pub struct Segment<'a> { + pub cmd: u32, + pub cmdsize: u32, + pub segname: [u8; 16], + pub vmaddr: u64, + pub vmsize: u64, + pub fileoff: u64, + pub filesize: u64, + pub maxprot: u32, + pub initprot: u32, + pub nsects: u32, + pub flags: u32, + pub data: &'a [u8], + offset: usize, + raw_data: &'a [u8], + ctx: container::Ctx, +} + +impl<'a> From<Segment<'a>> for SegmentCommand64 { + fn from(segment: Segment<'a>) -> Self { + SegmentCommand64 { + cmd: segment.cmd, + cmdsize: segment.cmdsize, + segname: segment.segname, + vmaddr: segment.vmaddr as u64, + vmsize: segment.vmsize as u64, + fileoff: segment.fileoff as u64, + filesize: segment.filesize as u64, + maxprot: segment.maxprot, + initprot: segment.initprot, + nsects: segment.nsects, + flags: segment.flags, + } + } +} + +impl<'a> From<Segment<'a>> for SegmentCommand32 { + fn from(segment: Segment<'a>) -> Self { + SegmentCommand32 { + cmd: segment.cmd, + cmdsize: segment.cmdsize, + segname: segment.segname, + vmaddr: segment.vmaddr as u32, + vmsize: segment.vmsize as u32, + fileoff: segment.fileoff as u32, + filesize: segment.filesize as u32, + maxprot: segment.maxprot, + initprot: segment.initprot, + nsects: segment.nsects, + flags: segment.flags, + } + } +} + +impl<'a> fmt::Debug for Segment<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Segment") + .field("cmd", &self.cmd) + .field("cmdsize", &self.cmdsize) + .field("segname", &self.segname.pread::<&str>(0).unwrap()) + .field("vmaddr", &self.vmaddr) + .field("vmsize", &self.vmsize) + .field("fileoff", &self.fileoff) + .field("filesize", &self.filesize) + .field("maxprot", &self.maxprot) + .field("initprot", &self.initprot) + .field("nsects", &self.nsects) + .field("flags", &self.flags) + .field("data", &self.data.len()) + .field("sections()", &self.sections().map(|sections| + sections.into_iter().map(|(section,_)| section).collect::<Vec<_>>()) + ) + .finish() + } +} + +impl<'a> ctx::SizeWith<container::Ctx> for Segment<'a> { + fn size_with(ctx: &container::Ctx) -> usize { + match ctx.container { + container::Container::Little => SIZEOF_SEGMENT_COMMAND_32, + container::Container::Big => SIZEOF_SEGMENT_COMMAND_64, + } + } +} + +impl<'a> ctx::TryIntoCtx<container::Ctx> for Segment<'a> { + type Error = crate::error::Error; + fn try_into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) -> Result<usize, Self::Error> { + let segment_size = Self::size_with(&ctx); + // should be able to write the section data inline after this, but not working at the moment + //let section_size = bytes.pwrite(data, segment_size)?; + //debug!("Segment size: {} raw section data size: {}", segment_size, data.len()); + if ctx.is_big () { + bytes.pwrite_with::<SegmentCommand64>(self.into(), 0, ctx.le)?; + } else { + bytes.pwrite_with::<SegmentCommand32>(self.into(), 0, ctx.le)?; + } + //debug!("Section size: {}", section_size); + Ok(segment_size) + } +} + +impl<'a> ctx::IntoCtx<container::Ctx> for Segment<'a> { + fn into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) { + bytes.pwrite_with(self, 0, ctx).unwrap(); + } +} + +/// Read data that belongs to a segment if the offset is within the boundaries of bytes. +fn segment_data(bytes: &[u8], fileoff :u64, filesize :u64) -> Result<&[u8], error::Error> { + let data :&[u8] = if filesize != 0 { + bytes.pread_with(fileoff as usize, filesize as usize)? + } else { + &[] + }; + Ok(data) +} + +impl<'a> Segment<'a> { + /// Create a new, blank segment, with cmd either `LC_SEGMENT_64`, or `LC_SEGMENT`, depending on `ctx`. + /// **NB** You are responsible for providing a correctly marshalled byte array as the sections. You should not use this for anything other than writing. + pub fn new(ctx: container::Ctx, sections: &'a [u8]) -> Self { + Segment { + cmd: if ctx.is_big() { LC_SEGMENT_64 } else { LC_SEGMENT }, + cmdsize: (Self::size_with(&ctx) + sections.len()) as u32, + segname: [0; 16], + vmaddr: 0, + vmsize: 0, + fileoff: 0, + filesize: 0, + maxprot: 0, + initprot: 0, + nsects: 0, + flags: 0, + data: sections, + offset: 0, + raw_data: &[], + ctx, + } + } + /// Get the name of this segment + pub fn name(&self) -> error::Result<&str> { + Ok(self.segname.pread::<&str>(0)?) + } + /// Get the sections from this segment, erroring if any section couldn't be retrieved + pub fn sections(&self) -> error::Result<Vec<(Section, SectionData<'a>)>> { + let mut sections = Vec::new(); + for section in self.into_iter() { + sections.push(section?); + } + Ok(sections) + } + /// Convert the raw C 32-bit segment command to a generalized version + pub fn from_32(bytes: &'a[u8], segment: &SegmentCommand32, offset: usize, ctx: container::Ctx) -> Result<Self, error::Error> { + Ok(Segment { + cmd: segment.cmd, + cmdsize: segment.cmdsize, + segname: segment.segname, + vmaddr: u64::from(segment.vmaddr), + vmsize: u64::from(segment.vmsize), + fileoff: u64::from(segment.fileoff), + filesize: u64::from(segment.filesize), + maxprot: segment.maxprot, + initprot: segment.initprot, + nsects: segment.nsects, + flags: segment.flags, + data: segment_data(bytes, u64::from(segment.fileoff), u64::from(segment.filesize))?, + offset, + raw_data: bytes, + ctx, + }) + } + /// Convert the raw C 64-bit segment command to a generalized version + pub fn from_64(bytes: &'a [u8], segment: &SegmentCommand64, offset: usize, ctx: container::Ctx) -> Result<Self, error::Error> { + Ok(Segment { + cmd: segment.cmd, + cmdsize: segment.cmdsize, + segname: segment.segname, + vmaddr: segment.vmaddr, + vmsize: segment.vmsize, + fileoff: segment.fileoff, + filesize: segment.filesize, + maxprot: segment.maxprot, + initprot: segment.initprot, + nsects: segment.nsects, + flags: segment.flags, + data: segment_data(bytes, segment.fileoff, segment.filesize)?, + offset, + raw_data: bytes, + ctx, + }) + } +} + +#[derive(Debug, Default)] +/// An opaque 32/64-bit container for Mach-o segments +pub struct Segments<'a> { + segments: Vec<Segment<'a>>, + ctx: container::Ctx, +} + +impl<'a> Deref for Segments<'a> { + type Target = Vec<Segment<'a>>; + fn deref(&self) -> &Self::Target { + &self.segments + } +} + +impl<'a> DerefMut for Segments<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.segments + } +} + +impl<'a, 'b> IntoIterator for &'b Segments<'a> { + type Item = &'b Segment<'a>; + type IntoIter = ::core::slice::Iter<'b, Segment<'a>>; + fn into_iter(self) -> Self::IntoIter { + self.segments.iter() + } +} + +impl<'a> Segments<'a> { + /// Construct a new generalized segment container from this `ctx` + pub fn new(ctx: container::Ctx) -> Self { + Segments { + segments: Vec::new(), + ctx, + } + } + /// Get every section from every segment + // thanks to SpaceManic for figuring out the 'b lifetimes here :) + pub fn sections<'b>(&'b self) -> Box<dyn Iterator<Item=SectionIterator<'a>> + 'b> { + Box::new(self.segments.iter().map(|segment| segment.into_iter())) + } +} diff --git a/third_party/rust/goblin/src/mach/symbols.rs b/third_party/rust/goblin/src/mach/symbols.rs new file mode 100644 index 0000000000..d5d73f3ac5 --- /dev/null +++ b/third_party/rust/goblin/src/mach/symbols.rs @@ -0,0 +1,469 @@ +//! "Nlist" style symbols in this binary - beware, like most symbol tables in most binary formats, they are strippable, and should not be relied upon, see the imports and exports modules for something more permanent. +//! +//! Symbols are essentially a type, offset, and the symbol name + +use scroll::ctx; +use scroll::ctx::SizeWith; +use scroll::{Pread, Pwrite, SizeWith, IOread, IOwrite}; +use crate::error; +use crate::container::{self, Container}; +use crate::mach::load_command; +use core::fmt::{self, Debug}; + +// The n_type field really contains four fields which are used via the following masks. +/// if any of these bits set, a symbolic debugging entry +pub const N_STAB: u8 = 0xe0; +/// private external symbol bit +pub const N_PEXT: u8 = 0x10; +/// mask for the type bits +pub const N_TYPE: u8 = 0x0e; +/// external symbol bit, set for external symbols +pub const N_EXT: u8 = 0x01; + +// If the type is N_SECT then the n_sect field contains an ordinal of the +// section the symbol is defined in. The sections are numbered from 1 and +// refer to sections in order they appear in the load commands for the file +// they are in. This means the same ordinal may very well refer to different +// sections in different files. + +// The n_value field for all symbol table entries (including N_STAB's) gets +// updated by the link editor based on the value of it's n_sect field and where +// the section n_sect references gets relocated. If the value of the n_sect +// field is NO_SECT then it's n_value field is not changed by the link editor. +/// symbol is not in any section +pub const NO_SECT: u8 = 0; +/// 1 thru 255 inclusive +pub const MAX_SECT: u8 = 255; + +/// undefined, n_sect == NO_SECT +pub const N_UNDF: u8 = 0x0; +/// absolute, n_sect == NO_SECT +pub const N_ABS: u8 = 0x2; +/// defined in section number n_sect +pub const N_SECT: u8 = 0xe; +/// prebound undefined (defined in a dylib) +pub const N_PBUD: u8 = 0xc; +/// indirect +pub const N_INDR: u8 = 0xa; + +// n_types when N_STAB +pub const N_GSYM: u8 = 0x20; +pub const N_FNAME: u8 = 0x22; +pub const N_FUN: u8 = 0x24; +pub const N_STSYM: u8 = 0x26; +pub const N_LCSYM: u8 = 0x28; +pub const N_BNSYM: u8 = 0x2e; +pub const N_PC: u8 = 0x30; +pub const N_AST: u8 = 0x32; +pub const N_OPT: u8 = 0x3c; +pub const N_RSYM: u8 = 0x40; +pub const N_SLINE: u8 = 0x44; +pub const N_ENSYM: u8 = 0x4e; +pub const N_SSYM: u8 = 0x60; +pub const N_SO: u8 = 0x64; +pub const N_OSO: u8 = 0x66; +pub const N_LSYM: u8 = 0x80; +pub const N_BINCL: u8 = 0x82; +pub const N_SOL: u8 = 0x84; +pub const N_PARAMS: u8 = 0x86; +pub const N_VERSION: u8 = 0x88; +pub const N_OLEVEL: u8 = 0x8a; +pub const N_PSYM: u8 = 0xa0; +pub const N_EINCL: u8 = 0xa2; +pub const N_ENTRY: u8 = 0xa4; +pub const N_LBRAC: u8 = 0xc0; +pub const N_EXCL: u8 = 0xc2; +pub const N_RBRAC: u8 = 0xe0; +pub const N_BCOMM: u8 = 0xe2; +pub const N_ECOMM: u8 = 0xe4; +pub const N_ECOML: u8 = 0xe8; +pub const N_LENG: u8 = 0xfe; + +pub const NLIST_TYPE_MASK: u8 = 0xe; +pub const NLIST_TYPE_GLOBAL: u8 = 0x1; +pub const NLIST_TYPE_LOCAL: u8 = 0x0; + +/// Mask for reference flags of `n_desc` field. +pub const REFERENCE_TYPE: u16 = 0xf; +/// This symbol is a reference to an external non-lazy (data) symbol. +pub const REFERENCE_FLAG_UNDEFINED_NON_LAZY: u16 = 0x0; +/// This symbol is a reference to an external lazy symbol—that is, to a function call. +pub const REFERENCE_FLAG_UNDEFINED_LAZY: u16 = 0x1; +/// This symbol is defined in this module. +pub const REFERENCE_FLAG_DEFINED: u16 = 0x2; +/// This symbol is defined in this module and is visible only to modules within this +/// shared library. +pub const REFERENCE_FLAG_PRIVATE_DEFINED: u16 = 0x3; +/// This symbol is defined in another module in this file, is a non-lazy (data) symbol, +/// and is visible only to modules within this shared library. +pub const REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY: u16 = 0x4; +/// This symbol is defined in another module in this file, is a lazy (function) symbol, +/// and is visible only to modules within this shared library. +pub const REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY: u16 = 0x5; + +// Additional flags of n_desc field. + +/// Must be set for any defined symbol that is referenced by dynamic-loader APIs +/// (such as dlsym and NSLookupSymbolInImage) and not ordinary undefined symbol +/// references. The `strip` tool uses this bit to avoid removing symbols that must +/// exist: If the symbol has this bit set, `strip` does not strip it. +pub const REFERENCED_DYNAMICALLY: u16 = 0x10; +/// Sometimes used by the dynamic linker at runtime in a fully linked image. Do not +/// set this bit in a fully linked image. +pub const N_DESC_DISCARDED: u16 = 0x20; +/// When set in a relocatable object file (file type MH_OBJECT) on a defined symbol, +/// indicates to the static linker to never dead-strip the symbol. +// (Note that the same bit (0x20) is used for two nonoverlapping purposes.) +pub const N_NO_DEAD_STRIP: u16 = 0x20; +/// Indicates that this undefined symbol is a weak reference. If the dynamic linker +/// cannot find a definition for this symbol, it sets the address of this symbol to 0. +/// The static linker sets this symbol given the appropriate weak-linking flags. +pub const N_WEAK_REF: u16 = 0x40; +/// Indicates that this symbol is a weak definition. If the static linker or the +/// dynamic linker finds another (non-weak) definition for this symbol, the weak +/// definition is ignored. Only symbols in a coalesced section can be marked as a +/// weak definition. +pub const N_WEAK_DEF: u16 = 0x80; + +pub fn n_type_to_str(n_type: u8) -> &'static str { + match n_type { + N_UNDF => "N_UNDF", + N_ABS => "N_ABS", + N_SECT => "N_SECT", + N_PBUD => "N_PBUD", + N_INDR => "N_INDR", + _ => "UNKNOWN_N_TYPE" + } +} + +#[repr(C)] +#[derive(Clone, Copy, Pread, Pwrite, SizeWith, IOread, IOwrite)] +pub struct Nlist32 { + /// index into the string table + pub n_strx: u32, + /// type flag, see below + pub n_type: u8, + /// section number or NO_SECT + pub n_sect: u8, + /// see <mach-o/stab.h> + pub n_desc: u16, + /// value of this symbol (or stab offset) + pub n_value: u32, +} + +pub const SIZEOF_NLIST_32: usize = 12; + +impl Debug for Nlist32 { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Nlist32") + .field("n_strx", &format_args!("{:04}", self.n_strx)) + .field("n_type", &format_args!("{:#02x}", self.n_type)) + .field("n_sect", &format_args!("{:#x}", self.n_sect)) + .field("n_desc", &format_args!("{:#03x}", self.n_desc)) + .field("n_value", &format_args!("{:#x}", self.n_value)) + .finish() + } +} + +#[repr(C)] +#[derive(Clone, Copy, Pread, Pwrite, SizeWith, IOread, IOwrite)] +pub struct Nlist64 { + /// index into the string table + pub n_strx: u32, + /// type flag, see below + pub n_type: u8, + /// section number or NO_SECT + pub n_sect: u8, + /// see <mach-o/stab.h> + pub n_desc: u16, + /// value of this symbol (or stab offset) + pub n_value: u64, +} + +pub const SIZEOF_NLIST_64: usize = 16; + +impl Debug for Nlist64 { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Nlist64") + .field("n_strx", &format_args!("{:04}", self.n_strx)) + .field("n_type", &format_args!("{:#02x}", self.n_type)) + .field("n_sect", &format_args!("{:#x}", self.n_sect)) + .field("n_desc", &format_args!("{:#03x}", self.n_desc)) + .field("n_value", &format_args!("{:#x}", self.n_value)) + .finish() + } +} + +#[derive(Debug, Clone,)] +pub struct Nlist { + /// index into the string table + pub n_strx: usize, + /// type flag, see below + pub n_type: u8, + /// section number or NO_SECT + pub n_sect: usize, + /// see <mach-o/stab.h> + pub n_desc: u16, + /// value of this symbol (or stab offset) + pub n_value: u64, +} + +impl Nlist { + /// Gets this symbol's type in bits 0xe + pub fn get_type(&self) -> u8 { + self.n_type & N_TYPE + } + /// Gets the str representation of the type of this symbol + pub fn type_str(&self) -> &'static str { + n_type_to_str(self.get_type()) + } + /// Whether this symbol is global or not + pub fn is_global(&self) -> bool { + self.n_type & N_EXT != 0 + } + /// Whether this symbol is weak or not + pub fn is_weak(&self) -> bool { + self.n_desc & (N_WEAK_REF | N_WEAK_DEF) != 0 + } + /// Whether this symbol is undefined or not + pub fn is_undefined(&self) -> bool { + self.n_sect == 0 && self.n_type & N_TYPE == N_UNDF + } + /// Whether this symbol is a symbolic debugging entry + pub fn is_stab(&self) -> bool { + self.n_type & N_STAB != 0 + } +} + +impl ctx::SizeWith<container::Ctx> for Nlist { + fn size_with(ctx: &container::Ctx) -> usize { + match ctx.container { + Container::Little => { + SIZEOF_NLIST_32 + }, + Container::Big => { + SIZEOF_NLIST_64 + }, + } + } +} + +impl From<Nlist32> for Nlist { + fn from(nlist: Nlist32) -> Self { + Nlist { + n_strx: nlist.n_strx as usize, + n_type: nlist.n_type, + n_sect: nlist.n_sect as usize, + n_desc: nlist.n_desc, + n_value: u64::from(nlist.n_value), + } + } +} + +impl From<Nlist64> for Nlist { + fn from(nlist: Nlist64) -> Self { + Nlist { + n_strx: nlist.n_strx as usize, + n_type: nlist.n_type, + n_sect: nlist.n_sect as usize, + n_desc: nlist.n_desc, + n_value: nlist.n_value, + } + } +} + +impl From<Nlist> for Nlist32 { + fn from(nlist: Nlist) -> Self { + Nlist32 { + n_strx: nlist.n_strx as u32, + n_type: nlist.n_type, + n_sect: nlist.n_sect as u8, + n_desc: nlist.n_desc, + n_value: nlist.n_value as u32, + } + } +} + +impl From<Nlist> for Nlist64 { + fn from(nlist: Nlist) -> Self { + Nlist64 { + n_strx: nlist.n_strx as u32, + n_type: nlist.n_type, + n_sect: nlist.n_sect as u8, + n_desc: nlist.n_desc, + n_value: nlist.n_value, + } + } +} + +impl<'a> ctx::TryFromCtx<'a, container::Ctx> for Nlist { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], container::Ctx { container, le }: container::Ctx) -> crate::error::Result<(Self, usize)> { + let nlist = match container { + Container::Little => { + (bytes.pread_with::<Nlist32>(0, le)?.into(), SIZEOF_NLIST_32) + }, + Container::Big => { + (bytes.pread_with::<Nlist64>(0, le)?.into(), SIZEOF_NLIST_64) + }, + }; + Ok(nlist) + } +} + +impl ctx::TryIntoCtx<container::Ctx> for Nlist { + type Error = crate::error::Error; + fn try_into_ctx(self, bytes: &mut [u8], container::Ctx { container, le }: container::Ctx) -> Result<usize, Self::Error> { + let size = match container { + Container::Little => { + (bytes.pwrite_with::<Nlist32>(self.into(), 0, le)?) + }, + Container::Big => { + (bytes.pwrite_with::<Nlist64>(self.into(), 0, le)?) + }, + }; + Ok(size) + } +} + +impl ctx::IntoCtx<container::Ctx> for Nlist { + fn into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) { + bytes.pwrite_with(self, 0, ctx).unwrap(); + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct SymbolsCtx { + pub nsyms: usize, + pub strtab: usize, + pub ctx: container::Ctx, +} + +impl<'a, T: ?Sized> ctx::TryFromCtx<'a, SymbolsCtx, T> for Symbols<'a> where T: AsRef<[u8]> { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a T, SymbolsCtx { + nsyms, strtab, ctx + }: SymbolsCtx) -> crate::error::Result<(Self, usize)> { + let data = bytes.as_ref(); + Ok ((Symbols { + data, + start: 0, + nsyms, + strtab, + ctx, + }, data.len())) + } +} + +#[derive(Default)] +pub struct SymbolIterator<'a> { + data: &'a [u8], + nsyms: usize, + offset: usize, + count: usize, + ctx: container::Ctx, + strtab: usize, +} + +impl<'a> Iterator for SymbolIterator<'a> { + type Item = error::Result<(&'a str, Nlist)>; + fn next(&mut self) -> Option<Self::Item> { + if self.count >= self.nsyms { + None + } else { + self.count += 1; + match self.data.gread_with::<Nlist>(&mut self.offset, self.ctx) { + Ok(symbol) => { + match self.data.pread(self.strtab + symbol.n_strx) { + Ok(name) => { + Some(Ok((name, symbol))) + }, + Err(e) => Some(Err(e.into())) + } + }, + Err(e) => Some(Err(e)) + } + } + } +} + +/// A zero-copy "nlist" style symbol table ("stab"), including the string table +pub struct Symbols<'a> { + data: &'a [u8], + start: usize, + nsyms: usize, + // TODO: we can use an actual strtab here and tie it to symbols lifetime + strtab: usize, + ctx: container::Ctx, +} + +impl<'a, 'b> IntoIterator for &'b Symbols<'a> { + type Item = <SymbolIterator<'a> as Iterator>::Item; + type IntoIter = SymbolIterator<'a>; + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> Symbols<'a> { + /// Creates a new symbol table with `count` elements, from the `start` offset, using the string table at `strtab`, with a _default_ ctx. + //// + /// **Beware**, this will provide incorrect results if you construct this on a 32-bit mach binary, using a 64-bit machine; use `parse` instead if you want 32/64 bit support + pub fn new(bytes: &'a [u8], start: usize, count: usize, strtab: usize) -> error::Result<Symbols<'a>> { + let nsyms = count; + Ok (Symbols { + data: bytes, + start, + nsyms, + strtab, + ctx: container::Ctx::default(), + }) + } + pub fn parse(bytes: &'a [u8], symtab: &load_command::SymtabCommand, ctx: container::Ctx) -> error::Result<Symbols<'a>> { + // we need to normalize the strtab offset before we receive the truncated bytes in pread_with + let strtab = symtab.stroff - symtab.symoff; + Ok(bytes.pread_with(symtab.symoff as usize, SymbolsCtx { nsyms: symtab.nsyms as usize, strtab: strtab as usize, ctx })?) + } + + pub fn iter(&self) -> SymbolIterator<'a> { + SymbolIterator { + offset: self.start as usize, + nsyms: self.nsyms as usize, + count: 0, + data: self.data, + ctx: self.ctx, + strtab: self.strtab, + } + } + + /// Parses a single Nlist symbol from the binary, with its accompanying name + pub fn get(&self, index: usize) -> crate::error::Result<(&'a str, Nlist)> { + let sym: Nlist = self.data.pread_with(self.start + (index * Nlist::size_with(&self.ctx)), self.ctx)?; + let name = self.data.pread(self.strtab + sym.n_strx)?; + Ok((name, sym)) + } +} + +impl<'a> Debug for Symbols<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Symbols") + .field("data", &self.data.len()) + .field("start", &format_args!("{:#?}", self.start)) + .field("nsyms", &self.nsyms) + .field("strtab", &format_args!("{:#x}", self.strtab)) + .finish()?; + + writeln!(fmt, "Symbol List {{")?; + for (i, res) in self.iter().enumerate() { + match res { + Ok((name, nlist)) => writeln!( + fmt, + "{: >10x} {} sect: {:#x} type: {:#02x} desc: {:#03x}", + nlist.n_value, name, nlist.n_sect, nlist.n_type, nlist.n_desc + )?, + Err(error) => writeln!(fmt, " Bad symbol, index: {}, sym: {:?}", i, error)?, + } + } + writeln!(fmt, "}}") + } +} |