// Copyright 2017 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package loadmacho implements a Mach-O file reader. package loadmacho import ( "bytes" "cmd/internal/bio" "cmd/internal/objabi" "cmd/internal/sys" "cmd/link/internal/loader" "cmd/link/internal/sym" "encoding/binary" "fmt" ) /* Derived from Plan 9 from User Space's src/libmach/elf.h, elf.c http://code.swtch.com/plan9port/src/tip/src/libmach/ Copyright © 2004 Russ Cox. Portions Copyright © 2008-2010 Google Inc. Portions Copyright © 2010 The Go Authors. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ // TODO(crawshaw): de-duplicate these symbols with cmd/link/internal/ld const ( MACHO_X86_64_RELOC_UNSIGNED = 0 MACHO_X86_64_RELOC_SIGNED = 1 MACHO_ARM64_RELOC_ADDEND = 10 ) type ldMachoObj struct { f *bio.Reader base int64 // off in f where Mach-O begins length int64 // length of Mach-O is64 bool name string e binary.ByteOrder cputype uint subcputype uint filetype uint32 flags uint32 cmd []ldMachoCmd ncmd uint } type ldMachoCmd struct { type_ int off uint32 size uint32 seg ldMachoSeg sym ldMachoSymtab dsym ldMachoDysymtab } type ldMachoSeg struct { name string vmaddr uint64 vmsize uint64 fileoff uint32 filesz uint32 maxprot uint32 initprot uint32 nsect uint32 flags uint32 sect []ldMachoSect } type ldMachoSect struct { name string segname string addr uint64 size uint64 off uint32 align uint32 reloff uint32 nreloc uint32 flags uint32 res1 uint32 res2 uint32 sym loader.Sym rel []ldMachoRel } type ldMachoRel struct { addr uint32 symnum uint32 pcrel uint8 length uint8 extrn uint8 type_ uint8 scattered uint8 value uint32 } type ldMachoSymtab struct { symoff uint32 nsym uint32 stroff uint32 strsize uint32 str []byte sym []ldMachoSym } type ldMachoSym struct { name string type_ uint8 sectnum uint8 desc uint16 kind int8 value uint64 sym loader.Sym } type ldMachoDysymtab struct { ilocalsym uint32 nlocalsym uint32 iextdefsym uint32 nextdefsym uint32 iundefsym uint32 nundefsym uint32 tocoff uint32 ntoc uint32 modtaboff uint32 nmodtab uint32 extrefsymoff uint32 nextrefsyms uint32 indirectsymoff uint32 nindirectsyms uint32 extreloff uint32 nextrel uint32 locreloff uint32 nlocrel uint32 indir []uint32 } // ldMachoSym.type_ const ( N_EXT = 0x01 N_TYPE = 0x1e N_STAB = 0xe0 ) // ldMachoSym.desc const ( N_WEAK_REF = 0x40 N_WEAK_DEF = 0x80 ) const ( LdMachoCpuVax = 1 LdMachoCpu68000 = 6 LdMachoCpu386 = 7 LdMachoCpuAmd64 = 1<<24 | 7 LdMachoCpuMips = 8 LdMachoCpu98000 = 10 LdMachoCpuHppa = 11 LdMachoCpuArm = 12 LdMachoCpuArm64 = 1<<24 | 12 LdMachoCpu88000 = 13 LdMachoCpuSparc = 14 LdMachoCpu860 = 15 LdMachoCpuAlpha = 16 LdMachoCpuPower = 18 LdMachoCmdSegment = 1 LdMachoCmdSymtab = 2 LdMachoCmdSymseg = 3 LdMachoCmdThread = 4 LdMachoCmdDysymtab = 11 LdMachoCmdSegment64 = 25 LdMachoFileObject = 1 LdMachoFileExecutable = 2 LdMachoFileFvmlib = 3 LdMachoFileCore = 4 LdMachoFilePreload = 5 ) func unpackcmd(p []byte, m *ldMachoObj, c *ldMachoCmd, type_ uint, sz uint) int { e4 := m.e.Uint32 e8 := m.e.Uint64 c.type_ = int(type_) c.size = uint32(sz) switch type_ { default: return -1 case LdMachoCmdSegment: if sz < 56 { return -1 } c.seg.name = cstring(p[8:24]) c.seg.vmaddr = uint64(e4(p[24:])) c.seg.vmsize = uint64(e4(p[28:])) c.seg.fileoff = e4(p[32:]) c.seg.filesz = e4(p[36:]) c.seg.maxprot = e4(p[40:]) c.seg.initprot = e4(p[44:]) c.seg.nsect = e4(p[48:]) c.seg.flags = e4(p[52:]) c.seg.sect = make([]ldMachoSect, c.seg.nsect) if uint32(sz) < 56+c.seg.nsect*68 { return -1 } p = p[56:] var s *ldMachoSect for i := 0; uint32(i) < c.seg.nsect; i++ { s = &c.seg.sect[i] s.name = cstring(p[0:16]) s.segname = cstring(p[16:32]) s.addr = uint64(e4(p[32:])) s.size = uint64(e4(p[36:])) s.off = e4(p[40:]) s.align = e4(p[44:]) s.reloff = e4(p[48:]) s.nreloc = e4(p[52:]) s.flags = e4(p[56:]) s.res1 = e4(p[60:]) s.res2 = e4(p[64:]) p = p[68:] } case LdMachoCmdSegment64: if sz < 72 { return -1 } c.seg.name = cstring(p[8:24]) c.seg.vmaddr = e8(p[24:]) c.seg.vmsize = e8(p[32:]) c.seg.fileoff = uint32(e8(p[40:])) c.seg.filesz = uint32(e8(p[48:])) c.seg.maxprot = e4(p[56:]) c.seg.initprot = e4(p[60:]) c.seg.nsect = e4(p[64:]) c.seg.flags = e4(p[68:]) c.seg.sect = make([]ldMachoSect, c.seg.nsect) if uint32(sz) < 72+c.seg.nsect*80 { return -1 } p = p[72:] var s *ldMachoSect for i := 0; uint32(i) < c.seg.nsect; i++ { s = &c.seg.sect[i] s.name = cstring(p[0:16]) s.segname = cstring(p[16:32]) s.addr = e8(p[32:]) s.size = e8(p[40:]) s.off = e4(p[48:]) s.align = e4(p[52:]) s.reloff = e4(p[56:]) s.nreloc = e4(p[60:]) s.flags = e4(p[64:]) s.res1 = e4(p[68:]) s.res2 = e4(p[72:]) // p+76 is reserved p = p[80:] } case LdMachoCmdSymtab: if sz < 24 { return -1 } c.sym.symoff = e4(p[8:]) c.sym.nsym = e4(p[12:]) c.sym.stroff = e4(p[16:]) c.sym.strsize = e4(p[20:]) case LdMachoCmdDysymtab: if sz < 80 { return -1 } c.dsym.ilocalsym = e4(p[8:]) c.dsym.nlocalsym = e4(p[12:]) c.dsym.iextdefsym = e4(p[16:]) c.dsym.nextdefsym = e4(p[20:]) c.dsym.iundefsym = e4(p[24:]) c.dsym.nundefsym = e4(p[28:]) c.dsym.tocoff = e4(p[32:]) c.dsym.ntoc = e4(p[36:]) c.dsym.modtaboff = e4(p[40:]) c.dsym.nmodtab = e4(p[44:]) c.dsym.extrefsymoff = e4(p[48:]) c.dsym.nextrefsyms = e4(p[52:]) c.dsym.indirectsymoff = e4(p[56:]) c.dsym.nindirectsyms = e4(p[60:]) c.dsym.extreloff = e4(p[64:]) c.dsym.nextrel = e4(p[68:]) c.dsym.locreloff = e4(p[72:]) c.dsym.nlocrel = e4(p[76:]) } return 0 } func macholoadrel(m *ldMachoObj, sect *ldMachoSect) int { if sect.rel != nil || sect.nreloc == 0 { return 0 } rel := make([]ldMachoRel, sect.nreloc) m.f.MustSeek(m.base+int64(sect.reloff), 0) buf, _, err := m.f.Slice(uint64(sect.nreloc * 8)) if err != nil { return -1 } for i := uint32(0); i < sect.nreloc; i++ { r := &rel[i] p := buf[i*8:] r.addr = m.e.Uint32(p) // TODO(rsc): Wrong interpretation for big-endian bitfields? if r.addr&0x80000000 != 0 { // scatterbrained relocation r.scattered = 1 v := r.addr >> 24 r.addr &= 0xFFFFFF r.type_ = uint8(v & 0xF) v >>= 4 r.length = 1 << (v & 3) v >>= 2 r.pcrel = uint8(v & 1) r.value = m.e.Uint32(p[4:]) } else { v := m.e.Uint32(p[4:]) r.symnum = v & 0xFFFFFF v >>= 24 r.pcrel = uint8(v & 1) v >>= 1 r.length = 1 << (v & 3) v >>= 2 r.extrn = uint8(v & 1) v >>= 1 r.type_ = uint8(v) } } sect.rel = rel return 0 } func macholoaddsym(m *ldMachoObj, d *ldMachoDysymtab) int { n := int(d.nindirectsyms) m.f.MustSeek(m.base+int64(d.indirectsymoff), 0) p, _, err := m.f.Slice(uint64(n * 4)) if err != nil { return -1 } d.indir = make([]uint32, n) for i := 0; i < n; i++ { d.indir[i] = m.e.Uint32(p[4*i:]) } return 0 } func macholoadsym(m *ldMachoObj, symtab *ldMachoSymtab) int { if symtab.sym != nil { return 0 } m.f.MustSeek(m.base+int64(symtab.stroff), 0) strbuf, _, err := m.f.Slice(uint64(symtab.strsize)) if err != nil { return -1 } symsize := 12 if m.is64 { symsize = 16 } n := int(symtab.nsym * uint32(symsize)) m.f.MustSeek(m.base+int64(symtab.symoff), 0) symbuf, _, err := m.f.Slice(uint64(n)) if err != nil { return -1 } sym := make([]ldMachoSym, symtab.nsym) p := symbuf for i := uint32(0); i < symtab.nsym; i++ { s := &sym[i] v := m.e.Uint32(p) if v >= symtab.strsize { return -1 } s.name = cstring(strbuf[v:]) s.type_ = p[4] s.sectnum = p[5] s.desc = m.e.Uint16(p[6:]) if m.is64 { s.value = m.e.Uint64(p[8:]) } else { s.value = uint64(m.e.Uint32(p[8:])) } p = p[symsize:] } symtab.str = strbuf symtab.sym = sym return 0 } // Load the Mach-O file pn from f. // Symbols are written into syms, and a slice of the text symbols is returned. func Load(l *loader.Loader, arch *sys.Arch, localSymVersion int, f *bio.Reader, pkg string, length int64, pn string) (textp []loader.Sym, err error) { errorf := func(str string, args ...interface{}) ([]loader.Sym, error) { return nil, fmt.Errorf("loadmacho: %v: %v", pn, fmt.Sprintf(str, args...)) } base := f.Offset() hdr, _, err := f.Slice(7 * 4) if err != nil { return errorf("reading hdr: %v", err) } var e binary.ByteOrder if binary.BigEndian.Uint32(hdr[:])&^1 == 0xFEEDFACE { e = binary.BigEndian } else if binary.LittleEndian.Uint32(hdr[:])&^1 == 0xFEEDFACE { e = binary.LittleEndian } else { return errorf("bad magic - not mach-o file") } is64 := e.Uint32(hdr[:]) == 0xFEEDFACF ncmd := e.Uint32(hdr[4*4:]) cmdsz := e.Uint32(hdr[5*4:]) if ncmd > 0x10000 || cmdsz >= 0x01000000 { return errorf("implausible mach-o header ncmd=%d cmdsz=%d", ncmd, cmdsz) } if is64 { f.MustSeek(4, 1) // skip reserved word in header } m := &ldMachoObj{ f: f, e: e, cputype: uint(e.Uint32(hdr[1*4:])), subcputype: uint(e.Uint32(hdr[2*4:])), filetype: e.Uint32(hdr[3*4:]), ncmd: uint(ncmd), flags: e.Uint32(hdr[6*4:]), is64: is64, base: base, length: length, name: pn, } switch arch.Family { default: return errorf("mach-o %s unimplemented", arch.Name) case sys.AMD64: if e != binary.LittleEndian || m.cputype != LdMachoCpuAmd64 { return errorf("mach-o object but not amd64") } case sys.ARM64: if e != binary.LittleEndian || m.cputype != LdMachoCpuArm64 { return errorf("mach-o object but not arm64") } } m.cmd = make([]ldMachoCmd, ncmd) cmdp, _, err := f.Slice(uint64(cmdsz)) if err != nil { return errorf("reading cmds: %v", err) } // read and parse load commands var c *ldMachoCmd var symtab *ldMachoSymtab var dsymtab *ldMachoDysymtab off := uint32(len(hdr)) for i := uint32(0); i < ncmd; i++ { ty := e.Uint32(cmdp) sz := e.Uint32(cmdp[4:]) m.cmd[i].off = off unpackcmd(cmdp, m, &m.cmd[i], uint(ty), uint(sz)) cmdp = cmdp[sz:] off += sz if ty == LdMachoCmdSymtab { if symtab != nil { return errorf("multiple symbol tables") } symtab = &m.cmd[i].sym macholoadsym(m, symtab) } if ty == LdMachoCmdDysymtab { dsymtab = &m.cmd[i].dsym macholoaddsym(m, dsymtab) } if (is64 && ty == LdMachoCmdSegment64) || (!is64 && ty == LdMachoCmdSegment) { if c != nil { return errorf("multiple load commands") } c = &m.cmd[i] } } // load text and data segments into memory. // they are not as small as the load commands, but we'll need // the memory anyway for the symbol images, so we might // as well use one large chunk. if c == nil { return errorf("no load command") } if symtab == nil { // our work is done here - no symbols means nothing can refer to this file return } if int64(c.seg.fileoff+c.seg.filesz) >= length { return errorf("load segment out of range") } f.MustSeek(m.base+int64(c.seg.fileoff), 0) dat, readOnly, err := f.Slice(uint64(c.seg.filesz)) if err != nil { return errorf("cannot load object data: %v", err) } for i := uint32(0); i < c.seg.nsect; i++ { sect := &c.seg.sect[i] if sect.segname != "__TEXT" && sect.segname != "__DATA" { continue } if sect.name == "__eh_frame" { continue } name := fmt.Sprintf("%s(%s/%s)", pkg, sect.segname, sect.name) s := l.LookupOrCreateSym(name, localSymVersion) bld := l.MakeSymbolUpdater(s) if bld.Type() != 0 { return errorf("duplicate %s/%s", sect.segname, sect.name) } if sect.flags&0xff == 1 { // S_ZEROFILL bld.SetData(make([]byte, sect.size)) } else { bld.SetReadOnly(readOnly) bld.SetData(dat[sect.addr-c.seg.vmaddr:][:sect.size]) } bld.SetSize(int64(len(bld.Data()))) if sect.segname == "__TEXT" { if sect.name == "__text" { bld.SetType(sym.STEXT) } else { bld.SetType(sym.SRODATA) } } else { if sect.name == "__bss" { bld.SetType(sym.SNOPTRBSS) bld.SetData(nil) } else { bld.SetType(sym.SNOPTRDATA) } } sect.sym = s } // enter sub-symbols into symbol table. // have to guess sizes from next symbol. for i := uint32(0); i < symtab.nsym; i++ { machsym := &symtab.sym[i] if machsym.type_&N_STAB != 0 { continue } // TODO: check sym->type against outer->type. name := machsym.name if name[0] == '_' && name[1] != '\x00' { name = name[1:] } v := 0 if machsym.type_&N_EXT == 0 { v = localSymVersion } s := l.LookupOrCreateSym(name, v) if machsym.type_&N_EXT == 0 { l.SetAttrDuplicateOK(s, true) } if machsym.desc&(N_WEAK_REF|N_WEAK_DEF) != 0 { l.SetAttrDuplicateOK(s, true) } machsym.sym = s if machsym.sectnum == 0 { // undefined continue } if uint32(machsym.sectnum) > c.seg.nsect { return errorf("reference to invalid section %d", machsym.sectnum) } sect := &c.seg.sect[machsym.sectnum-1] bld := l.MakeSymbolUpdater(s) outer := sect.sym if outer == 0 { continue // ignore reference to invalid section } if osym := l.OuterSym(s); osym != 0 { if l.AttrDuplicateOK(s) { continue } return errorf("duplicate symbol reference: %s in both %s and %s", l.SymName(s), l.SymName(osym), l.SymName(sect.sym)) } bld.SetType(l.SymType(outer)) if l.SymSize(outer) != 0 { // skip empty section (0-sized symbol) l.AddInteriorSym(outer, s) } bld.SetValue(int64(machsym.value - sect.addr)) if !l.AttrCgoExportDynamic(s) { bld.SetDynimplib("") // satisfy dynimport } if l.SymType(outer) == sym.STEXT { if bld.External() && !bld.DuplicateOK() { return errorf("%v: duplicate symbol definition", s) } bld.SetExternal(true) } } // Sort outer lists by address, adding to textp. // This keeps textp in increasing address order. for i := 0; uint32(i) < c.seg.nsect; i++ { sect := &c.seg.sect[i] s := sect.sym if s == 0 { continue } bld := l.MakeSymbolUpdater(s) if bld.SubSym() != 0 { bld.SortSub() // assign sizes, now that we know symbols in sorted order. for s1 := bld.Sub(); s1 != 0; s1 = l.SubSym(s1) { s1Bld := l.MakeSymbolUpdater(s1) if sub := l.SubSym(s1); sub != 0 { s1Bld.SetSize(l.SymValue(sub) - l.SymValue(s1)) } else { dlen := int64(len(l.Data(s))) s1Bld.SetSize(l.SymValue(s) + dlen - l.SymValue(s1)) } } } if bld.Type() == sym.STEXT { if bld.OnList() { return errorf("symbol %s listed multiple times", bld.Name()) } bld.SetOnList(true) textp = append(textp, s) for s1 := bld.Sub(); s1 != 0; s1 = l.SubSym(s1) { if l.AttrOnList(s1) { return errorf("symbol %s listed multiple times", l.RawSymName(s1)) } l.SetAttrOnList(s1, true) textp = append(textp, s1) } } } // load relocations for i := 0; uint32(i) < c.seg.nsect; i++ { sect := &c.seg.sect[i] s := sect.sym if s == 0 { continue } macholoadrel(m, sect) if sect.rel == nil { continue } sb := l.MakeSymbolUpdater(sect.sym) var rAdd int64 for j := uint32(0); j < sect.nreloc; j++ { var ( rOff int32 rSize uint8 rType objabi.RelocType rSym loader.Sym ) rel := §.rel[j] if rel.scattered != 0 { // mach-o only uses scattered relocation on 32-bit platforms, // which are no longer supported. return errorf("%v: unexpected scattered relocation", s) } if arch.Family == sys.ARM64 && rel.type_ == MACHO_ARM64_RELOC_ADDEND { // Two relocations. This addend will be applied to the next one. rAdd = int64(rel.symnum) << 40 >> 40 // convert unsigned 24-bit to signed 24-bit continue } rSize = rel.length rType = objabi.MachoRelocOffset + (objabi.RelocType(rel.type_) << 1) + objabi.RelocType(rel.pcrel) rOff = int32(rel.addr) // Handle X86_64_RELOC_SIGNED referencing a section (rel.extrn == 0). p := l.Data(s) if arch.Family == sys.AMD64 { if rel.extrn == 0 && rel.type_ == MACHO_X86_64_RELOC_SIGNED { // Calculate the addend as the offset into the section. // // The rip-relative offset stored in the object file is encoded // as follows: // // movsd 0x00000360(%rip),%xmm0 // // To get the absolute address of the value this rip-relative address is pointing // to, we must add the address of the next instruction to it. This is done by // taking the address of the relocation and adding 4 to it (since the rip-relative // offset can at most be 32 bits long). To calculate the offset into the section the // relocation is referencing, we subtract the vaddr of the start of the referenced // section found in the original object file. // // [For future reference, see Darwin's /usr/include/mach-o/x86_64/reloc.h] secaddr := c.seg.sect[rel.symnum-1].addr rAdd = int64(uint64(int64(int32(e.Uint32(p[rOff:])))+int64(rOff)+4) - secaddr) } else { rAdd = int64(int32(e.Uint32(p[rOff:]))) } } // An unsigned internal relocation has a value offset // by the section address. if arch.Family == sys.AMD64 && rel.extrn == 0 && rel.type_ == MACHO_X86_64_RELOC_UNSIGNED { secaddr := c.seg.sect[rel.symnum-1].addr rAdd -= int64(secaddr) } if rel.extrn == 0 { if rel.symnum < 1 || rel.symnum > c.seg.nsect { return errorf("invalid relocation: section reference out of range %d vs %d", rel.symnum, c.seg.nsect) } rSym = c.seg.sect[rel.symnum-1].sym if rSym == 0 { return errorf("invalid relocation: %s", c.seg.sect[rel.symnum-1].name) } } else { if rel.symnum >= symtab.nsym { return errorf("invalid relocation: symbol reference out of range") } rSym = symtab.sym[rel.symnum].sym } r, _ := sb.AddRel(rType) r.SetOff(rOff) r.SetSiz(rSize) r.SetSym(rSym) r.SetAdd(rAdd) rAdd = 0 // clear rAdd for next iteration } sb.SortRelocs() } return textp, nil } func cstring(x []byte) string { i := bytes.IndexByte(x, '\x00') if i >= 0 { x = x[:i] } return string(x) }