diff options
Diffstat (limited to 'src/debug/pe/file.go')
-rw-r--r-- | src/debug/pe/file.go | 614 |
1 files changed, 614 insertions, 0 deletions
diff --git a/src/debug/pe/file.go b/src/debug/pe/file.go new file mode 100644 index 0000000..aa0955a --- /dev/null +++ b/src/debug/pe/file.go @@ -0,0 +1,614 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package pe implements access to PE (Microsoft Windows Portable Executable) files. +package pe + +import ( + "bytes" + "compress/zlib" + "debug/dwarf" + "encoding/binary" + "fmt" + "io" + "os" + "strings" +) + +// Avoid use of post-Go 1.4 io features, to make safe for toolchain bootstrap. +const seekStart = 0 + +// A File represents an open PE file. +type File struct { + FileHeader + OptionalHeader any // of type *OptionalHeader32 or *OptionalHeader64 + Sections []*Section + Symbols []*Symbol // COFF symbols with auxiliary symbol records removed + COFFSymbols []COFFSymbol // all COFF symbols (including auxiliary symbol records) + StringTable StringTable + + closer io.Closer +} + +// Open opens the named file using os.Open and prepares it for use as a PE binary. +func Open(name string) (*File, error) { + f, err := os.Open(name) + if err != nil { + return nil, err + } + ff, err := NewFile(f) + if err != nil { + f.Close() + return nil, err + } + ff.closer = f + return ff, nil +} + +// Close closes the File. +// If the File was created using NewFile directly instead of Open, +// Close has no effect. +func (f *File) Close() error { + var err error + if f.closer != nil { + err = f.closer.Close() + f.closer = nil + } + return err +} + +// TODO(brainman): add Load function, as a replacement for NewFile, that does not call removeAuxSymbols (for performance) + +// NewFile creates a new File for accessing a PE binary in an underlying reader. +func NewFile(r io.ReaderAt) (*File, error) { + f := new(File) + sr := io.NewSectionReader(r, 0, 1<<63-1) + + var dosheader [96]byte + if _, err := r.ReadAt(dosheader[0:], 0); err != nil { + return nil, err + } + var base int64 + if dosheader[0] == 'M' && dosheader[1] == 'Z' { + signoff := int64(binary.LittleEndian.Uint32(dosheader[0x3c:])) + var sign [4]byte + r.ReadAt(sign[:], signoff) + if !(sign[0] == 'P' && sign[1] == 'E' && sign[2] == 0 && sign[3] == 0) { + return nil, fmt.Errorf("invalid PE file signature: % x", sign) + } + base = signoff + 4 + } else { + base = int64(0) + } + sr.Seek(base, seekStart) + if err := binary.Read(sr, binary.LittleEndian, &f.FileHeader); err != nil { + return nil, err + } + switch f.FileHeader.Machine { + case IMAGE_FILE_MACHINE_AMD64, + IMAGE_FILE_MACHINE_ARM64, + IMAGE_FILE_MACHINE_ARMNT, + IMAGE_FILE_MACHINE_I386, + IMAGE_FILE_MACHINE_UNKNOWN: + // ok + default: + return nil, fmt.Errorf("unrecognized PE machine: %#x", f.FileHeader.Machine) + } + + var err error + + // Read string table. + f.StringTable, err = readStringTable(&f.FileHeader, sr) + if err != nil { + return nil, err + } + + // Read symbol table. + f.COFFSymbols, err = readCOFFSymbols(&f.FileHeader, sr) + if err != nil { + return nil, err + } + f.Symbols, err = removeAuxSymbols(f.COFFSymbols, f.StringTable) + if err != nil { + return nil, err + } + + // Seek past file header. + _, err = sr.Seek(base+int64(binary.Size(f.FileHeader)), seekStart) + if err != nil { + return nil, err + } + + // Read optional header. + f.OptionalHeader, err = readOptionalHeader(sr, f.FileHeader.SizeOfOptionalHeader) + if err != nil { + return nil, err + } + + // Process sections. + f.Sections = make([]*Section, f.FileHeader.NumberOfSections) + for i := 0; i < int(f.FileHeader.NumberOfSections); i++ { + sh := new(SectionHeader32) + if err := binary.Read(sr, binary.LittleEndian, sh); err != nil { + return nil, err + } + name, err := sh.fullName(f.StringTable) + if err != nil { + return nil, err + } + s := new(Section) + s.SectionHeader = SectionHeader{ + Name: name, + VirtualSize: sh.VirtualSize, + VirtualAddress: sh.VirtualAddress, + Size: sh.SizeOfRawData, + Offset: sh.PointerToRawData, + PointerToRelocations: sh.PointerToRelocations, + PointerToLineNumbers: sh.PointerToLineNumbers, + NumberOfRelocations: sh.NumberOfRelocations, + NumberOfLineNumbers: sh.NumberOfLineNumbers, + Characteristics: sh.Characteristics, + } + r2 := r + if sh.PointerToRawData == 0 { // .bss must have all 0s + r2 = zeroReaderAt{} + } + s.sr = io.NewSectionReader(r2, int64(s.SectionHeader.Offset), int64(s.SectionHeader.Size)) + s.ReaderAt = s.sr + f.Sections[i] = s + } + for i := range f.Sections { + var err error + f.Sections[i].Relocs, err = readRelocs(&f.Sections[i].SectionHeader, sr) + if err != nil { + return nil, err + } + } + + return f, nil +} + +// zeroReaderAt is ReaderAt that reads 0s. +type zeroReaderAt struct{} + +// ReadAt writes len(p) 0s into p. +func (w zeroReaderAt) ReadAt(p []byte, off int64) (n int, err error) { + for i := range p { + p[i] = 0 + } + return len(p), nil +} + +// getString extracts a string from symbol string table. +func getString(section []byte, start int) (string, bool) { + if start < 0 || start >= len(section) { + return "", false + } + + for end := start; end < len(section); end++ { + if section[end] == 0 { + return string(section[start:end]), true + } + } + return "", false +} + +// Section returns the first section with the given name, or nil if no such +// section exists. +func (f *File) Section(name string) *Section { + for _, s := range f.Sections { + if s.Name == name { + return s + } + } + return nil +} + +func (f *File) DWARF() (*dwarf.Data, error) { + dwarfSuffix := func(s *Section) string { + switch { + case strings.HasPrefix(s.Name, ".debug_"): + return s.Name[7:] + case strings.HasPrefix(s.Name, ".zdebug_"): + return s.Name[8:] + default: + return "" + } + + } + + // sectionData gets the data for s and checks its size. + sectionData := func(s *Section) ([]byte, error) { + b, err := s.Data() + if err != nil && uint32(len(b)) < s.Size { + return nil, err + } + + if 0 < s.VirtualSize && s.VirtualSize < s.Size { + b = b[:s.VirtualSize] + } + + if len(b) >= 12 && string(b[:4]) == "ZLIB" { + dlen := binary.BigEndian.Uint64(b[4:12]) + dbuf := make([]byte, dlen) + r, err := zlib.NewReader(bytes.NewBuffer(b[12:])) + if err != nil { + return nil, err + } + if _, err := io.ReadFull(r, dbuf); err != nil { + return nil, err + } + if err := r.Close(); err != nil { + return nil, err + } + b = dbuf + } + return b, nil + } + + // There are many other DWARF sections, but these + // are the ones the debug/dwarf package uses. + // Don't bother loading others. + var dat = map[string][]byte{"abbrev": nil, "info": nil, "str": nil, "line": nil, "ranges": nil} + for _, s := range f.Sections { + suffix := dwarfSuffix(s) + if suffix == "" { + continue + } + if _, ok := dat[suffix]; !ok { + continue + } + + b, err := sectionData(s) + if err != nil { + return nil, err + } + dat[suffix] = b + } + + d, err := dwarf.New(dat["abbrev"], nil, nil, dat["info"], dat["line"], nil, dat["ranges"], dat["str"]) + if err != nil { + return nil, err + } + + // Look for DWARF4 .debug_types sections and DWARF5 sections. + for i, s := range f.Sections { + suffix := dwarfSuffix(s) + if suffix == "" { + continue + } + if _, ok := dat[suffix]; ok { + // Already handled. + continue + } + + b, err := sectionData(s) + if err != nil { + return nil, err + } + + if suffix == "types" { + err = d.AddTypes(fmt.Sprintf("types-%d", i), b) + } else { + err = d.AddSection(".debug_"+suffix, b) + } + if err != nil { + return nil, err + } + } + + return d, nil +} + +// TODO(brainman): document ImportDirectory once we decide what to do with it. + +type ImportDirectory struct { + OriginalFirstThunk uint32 + TimeDateStamp uint32 + ForwarderChain uint32 + Name uint32 + FirstThunk uint32 + + dll string +} + +// ImportedSymbols returns the names of all symbols +// referred to by the binary f that are expected to be +// satisfied by other libraries at dynamic load time. +// It does not return weak symbols. +func (f *File) ImportedSymbols() ([]string, error) { + if f.OptionalHeader == nil { + return nil, nil + } + + pe64 := f.Machine == IMAGE_FILE_MACHINE_AMD64 || f.Machine == IMAGE_FILE_MACHINE_ARM64 + + // grab the number of data directory entries + var dd_length uint32 + if pe64 { + dd_length = f.OptionalHeader.(*OptionalHeader64).NumberOfRvaAndSizes + } else { + dd_length = f.OptionalHeader.(*OptionalHeader32).NumberOfRvaAndSizes + } + + // check that the length of data directory entries is large + // enough to include the imports directory. + if dd_length < IMAGE_DIRECTORY_ENTRY_IMPORT+1 { + return nil, nil + } + + // grab the import data directory entry + var idd DataDirectory + if pe64 { + idd = f.OptionalHeader.(*OptionalHeader64).DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT] + } else { + idd = f.OptionalHeader.(*OptionalHeader32).DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT] + } + + // figure out which section contains the import directory table + var ds *Section + ds = nil + for _, s := range f.Sections { + if s.VirtualAddress <= idd.VirtualAddress && idd.VirtualAddress < s.VirtualAddress+s.VirtualSize { + ds = s + break + } + } + + // didn't find a section, so no import libraries were found + if ds == nil { + return nil, nil + } + + d, err := ds.Data() + if err != nil { + return nil, err + } + + // seek to the virtual address specified in the import data directory + d = d[idd.VirtualAddress-ds.VirtualAddress:] + + // start decoding the import directory + var ida []ImportDirectory + for len(d) >= 20 { + var dt ImportDirectory + dt.OriginalFirstThunk = binary.LittleEndian.Uint32(d[0:4]) + dt.TimeDateStamp = binary.LittleEndian.Uint32(d[4:8]) + dt.ForwarderChain = binary.LittleEndian.Uint32(d[8:12]) + dt.Name = binary.LittleEndian.Uint32(d[12:16]) + dt.FirstThunk = binary.LittleEndian.Uint32(d[16:20]) + d = d[20:] + if dt.OriginalFirstThunk == 0 { + break + } + ida = append(ida, dt) + } + // TODO(brainman): this needs to be rewritten + // ds.Data() returns contents of section containing import table. Why store in variable called "names"? + // Why we are retrieving it second time? We already have it in "d", and it is not modified anywhere. + // getString does not extracts a string from symbol string table (as getString doco says). + // Why ds.Data() called again and again in the loop? + // Needs test before rewrite. + names, _ := ds.Data() + var all []string + for _, dt := range ida { + dt.dll, _ = getString(names, int(dt.Name-ds.VirtualAddress)) + d, _ = ds.Data() + // seek to OriginalFirstThunk + d = d[dt.OriginalFirstThunk-ds.VirtualAddress:] + for len(d) > 0 { + if pe64 { // 64bit + va := binary.LittleEndian.Uint64(d[0:8]) + d = d[8:] + if va == 0 { + break + } + if va&0x8000000000000000 > 0 { // is Ordinal + // TODO add dynimport ordinal support. + } else { + fn, _ := getString(names, int(uint32(va)-ds.VirtualAddress+2)) + all = append(all, fn+":"+dt.dll) + } + } else { // 32bit + va := binary.LittleEndian.Uint32(d[0:4]) + d = d[4:] + if va == 0 { + break + } + if va&0x80000000 > 0 { // is Ordinal + // TODO add dynimport ordinal support. + //ord := va&0x0000FFFF + } else { + fn, _ := getString(names, int(va-ds.VirtualAddress+2)) + all = append(all, fn+":"+dt.dll) + } + } + } + } + + return all, nil +} + +// ImportedLibraries returns the names of all libraries +// referred to by the binary f that are expected to be +// linked with the binary at dynamic link time. +func (f *File) ImportedLibraries() ([]string, error) { + // TODO + // cgo -dynimport don't use this for windows PE, so just return. + return nil, nil +} + +// FormatError is unused. +// The type is retained for compatibility. +type FormatError struct { +} + +func (e *FormatError) Error() string { + return "unknown error" +} + +// readOptionalHeader accepts a io.ReadSeeker pointing to optional header in the PE file +// and its size as seen in the file header. +// It parses the given size of bytes and returns optional header. It infers whether the +// bytes being parsed refer to 32 bit or 64 bit version of optional header. +func readOptionalHeader(r io.ReadSeeker, sz uint16) (any, error) { + // If optional header size is 0, return empty optional header. + if sz == 0 { + return nil, nil + } + + var ( + // First couple of bytes in option header state its type. + // We need to read them first to determine the type and + // validity of optional header. + ohMagic uint16 + ohMagicSz = binary.Size(ohMagic) + ) + + // If optional header size is greater than 0 but less than its magic size, return error. + if sz < uint16(ohMagicSz) { + return nil, fmt.Errorf("optional header size is less than optional header magic size") + } + + // read reads from io.ReadSeeke, r, into data. + var err error + read := func(data any) bool { + err = binary.Read(r, binary.LittleEndian, data) + return err == nil + } + + if !read(&ohMagic) { + return nil, fmt.Errorf("failure to read optional header magic: %v", err) + + } + + switch ohMagic { + case 0x10b: // PE32 + var ( + oh32 OptionalHeader32 + // There can be 0 or more data directories. So the minimum size of optional + // header is calculated by subtracting oh32.DataDirectory size from oh32 size. + oh32MinSz = binary.Size(oh32) - binary.Size(oh32.DataDirectory) + ) + + if sz < uint16(oh32MinSz) { + return nil, fmt.Errorf("optional header size(%d) is less minimum size (%d) of PE32 optional header", sz, oh32MinSz) + } + + // Init oh32 fields + oh32.Magic = ohMagic + if !read(&oh32.MajorLinkerVersion) || + !read(&oh32.MinorLinkerVersion) || + !read(&oh32.SizeOfCode) || + !read(&oh32.SizeOfInitializedData) || + !read(&oh32.SizeOfUninitializedData) || + !read(&oh32.AddressOfEntryPoint) || + !read(&oh32.BaseOfCode) || + !read(&oh32.BaseOfData) || + !read(&oh32.ImageBase) || + !read(&oh32.SectionAlignment) || + !read(&oh32.FileAlignment) || + !read(&oh32.MajorOperatingSystemVersion) || + !read(&oh32.MinorOperatingSystemVersion) || + !read(&oh32.MajorImageVersion) || + !read(&oh32.MinorImageVersion) || + !read(&oh32.MajorSubsystemVersion) || + !read(&oh32.MinorSubsystemVersion) || + !read(&oh32.Win32VersionValue) || + !read(&oh32.SizeOfImage) || + !read(&oh32.SizeOfHeaders) || + !read(&oh32.CheckSum) || + !read(&oh32.Subsystem) || + !read(&oh32.DllCharacteristics) || + !read(&oh32.SizeOfStackReserve) || + !read(&oh32.SizeOfStackCommit) || + !read(&oh32.SizeOfHeapReserve) || + !read(&oh32.SizeOfHeapCommit) || + !read(&oh32.LoaderFlags) || + !read(&oh32.NumberOfRvaAndSizes) { + return nil, fmt.Errorf("failure to read PE32 optional header: %v", err) + } + + dd, err := readDataDirectories(r, sz-uint16(oh32MinSz), oh32.NumberOfRvaAndSizes) + if err != nil { + return nil, err + } + + copy(oh32.DataDirectory[:], dd) + + return &oh32, nil + case 0x20b: // PE32+ + var ( + oh64 OptionalHeader64 + // There can be 0 or more data directories. So the minimum size of optional + // header is calculated by subtracting oh64.DataDirectory size from oh64 size. + oh64MinSz = binary.Size(oh64) - binary.Size(oh64.DataDirectory) + ) + + if sz < uint16(oh64MinSz) { + return nil, fmt.Errorf("optional header size(%d) is less minimum size (%d) for PE32+ optional header", sz, oh64MinSz) + } + + // Init oh64 fields + oh64.Magic = ohMagic + if !read(&oh64.MajorLinkerVersion) || + !read(&oh64.MinorLinkerVersion) || + !read(&oh64.SizeOfCode) || + !read(&oh64.SizeOfInitializedData) || + !read(&oh64.SizeOfUninitializedData) || + !read(&oh64.AddressOfEntryPoint) || + !read(&oh64.BaseOfCode) || + !read(&oh64.ImageBase) || + !read(&oh64.SectionAlignment) || + !read(&oh64.FileAlignment) || + !read(&oh64.MajorOperatingSystemVersion) || + !read(&oh64.MinorOperatingSystemVersion) || + !read(&oh64.MajorImageVersion) || + !read(&oh64.MinorImageVersion) || + !read(&oh64.MajorSubsystemVersion) || + !read(&oh64.MinorSubsystemVersion) || + !read(&oh64.Win32VersionValue) || + !read(&oh64.SizeOfImage) || + !read(&oh64.SizeOfHeaders) || + !read(&oh64.CheckSum) || + !read(&oh64.Subsystem) || + !read(&oh64.DllCharacteristics) || + !read(&oh64.SizeOfStackReserve) || + !read(&oh64.SizeOfStackCommit) || + !read(&oh64.SizeOfHeapReserve) || + !read(&oh64.SizeOfHeapCommit) || + !read(&oh64.LoaderFlags) || + !read(&oh64.NumberOfRvaAndSizes) { + return nil, fmt.Errorf("failure to read PE32+ optional header: %v", err) + } + + dd, err := readDataDirectories(r, sz-uint16(oh64MinSz), oh64.NumberOfRvaAndSizes) + if err != nil { + return nil, err + } + + copy(oh64.DataDirectory[:], dd) + + return &oh64, nil + default: + return nil, fmt.Errorf("optional header has unexpected Magic of 0x%x", ohMagic) + } +} + +// readDataDirectories accepts a io.ReadSeeker pointing to data directories in the PE file, +// its size and number of data directories as seen in optional header. +// It parses the given size of bytes and returns given number of data directories. +func readDataDirectories(r io.ReadSeeker, sz uint16, n uint32) ([]DataDirectory, error) { + ddSz := binary.Size(DataDirectory{}) + if uint32(sz) != n*uint32(ddSz) { + return nil, fmt.Errorf("size of data directories(%d) is inconsistent with number of data directories(%d)", sz, n) + } + + dd := make([]DataDirectory, n) + if err := binary.Read(r, binary.LittleEndian, dd); err != nil { + return nil, fmt.Errorf("failure to read data directories: %v", err) + } + + return dd, nil +} |