290 lines
12 KiB
Python
Executable file
290 lines
12 KiB
Python
Executable file
#!/usr/bin/env python
|
|
# Copyright 2018 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""Wraps ml.exe or ml64.exe and postprocesses the output to be deterministic.
|
|
Sets timestamp in .obj file to 0, hence incompatible with link.exe /incremental.
|
|
|
|
Use by prefixing the ml(64).exe invocation with this script:
|
|
python ml.py ml.exe [args...]"""
|
|
|
|
import array
|
|
import collections
|
|
import struct
|
|
import subprocess
|
|
import sys
|
|
|
|
|
|
class Struct(object):
|
|
"""A thin wrapper around the struct module that returns a namedtuple"""
|
|
def __init__(self, name, *args):
|
|
"""Pass the name of the return type, and then an interleaved list of
|
|
format strings as used by the struct module and of field names."""
|
|
self.fmt = '<' + ''.join(args[0::2])
|
|
self.type = collections.namedtuple(name, args[1::2])
|
|
|
|
def pack_into(self, buffer, offset, data):
|
|
return struct.pack_into(self.fmt, buffer, offset, *data)
|
|
|
|
def unpack_from(self, buffer, offset=0):
|
|
return self.type(*struct.unpack_from(self.fmt, buffer, offset))
|
|
|
|
def size(self):
|
|
return struct.calcsize(self.fmt)
|
|
|
|
|
|
def Subtract(nt, **kwargs):
|
|
"""Subtract(nt, f=2) returns a new namedtuple with 2 subtracted from nt.f"""
|
|
return nt._replace(**{k: getattr(nt, k) - v for k, v in kwargs.items()})
|
|
|
|
|
|
def MakeDeterministic(objdata):
|
|
# Takes data produced by ml(64).exe (without any special flags) and
|
|
# 1. Sets the timestamp to 0
|
|
# 2. Strips the .debug$S section (which contains an unwanted absolute path)
|
|
|
|
# This makes several assumptions about ml's output:
|
|
# - Section data is in the same order as the corresponding section headers:
|
|
# section headers preceding the .debug$S section header have their data
|
|
# preceding the .debug$S section data; likewise for section headers
|
|
# following the .debug$S section.
|
|
# - The .debug$S section contains only the absolute path to the obj file and
|
|
# nothing else, in particular there's only a single entry in the symbol
|
|
# table referring to the .debug$S section.
|
|
# - There are no COFF line number entries.
|
|
# - There's no IMAGE_SYM_CLASS_CLR_TOKEN symbol.
|
|
# These seem to hold in practice; if they stop holding this script needs to
|
|
# become smarter.
|
|
|
|
objdata = array.array('b', objdata) # Writable, e.g. via struct.pack_into.
|
|
|
|
# Read coff header.
|
|
COFFHEADER = Struct('COFFHEADER',
|
|
'H', 'Machine',
|
|
'H', 'NumberOfSections',
|
|
'I', 'TimeDateStamp',
|
|
'I', 'PointerToSymbolTable',
|
|
'I', 'NumberOfSymbols',
|
|
|
|
'H', 'SizeOfOptionalHeader',
|
|
'H', 'Characteristics')
|
|
coff_header = COFFHEADER.unpack_from(objdata)
|
|
assert coff_header.SizeOfOptionalHeader == 0 # Only set for binaries.
|
|
|
|
# Read section headers following coff header.
|
|
SECTIONHEADER = Struct('SECTIONHEADER',
|
|
'8s', 'Name',
|
|
'I', 'VirtualSize',
|
|
'I', 'VirtualAddress',
|
|
|
|
'I', 'SizeOfRawData',
|
|
'I', 'PointerToRawData',
|
|
'I', 'PointerToRelocations',
|
|
'I', 'PointerToLineNumbers',
|
|
|
|
'H', 'NumberOfRelocations',
|
|
'H', 'NumberOfLineNumbers',
|
|
'I', 'Characteristics')
|
|
section_headers = []
|
|
debug_section_index = -1
|
|
for i in range(0, coff_header.NumberOfSections):
|
|
section_header = SECTIONHEADER.unpack_from(
|
|
objdata, offset=COFFHEADER.size() + i * SECTIONHEADER.size())
|
|
assert not section_header[0].startswith(b'/') # Support short names only.
|
|
section_headers.append(section_header)
|
|
|
|
if section_header.Name == b'.debug$S':
|
|
assert debug_section_index == -1
|
|
debug_section_index = i
|
|
assert debug_section_index != -1
|
|
|
|
data_start = COFFHEADER.size() + len(section_headers) * SECTIONHEADER.size()
|
|
|
|
# Verify the .debug$S section looks like we expect.
|
|
assert section_headers[debug_section_index].Name == b'.debug$S'
|
|
assert section_headers[debug_section_index].VirtualSize == 0
|
|
assert section_headers[debug_section_index].VirtualAddress == 0
|
|
debug_size = section_headers[debug_section_index].SizeOfRawData
|
|
debug_offset = section_headers[debug_section_index].PointerToRawData
|
|
assert section_headers[debug_section_index].PointerToRelocations == 0
|
|
assert section_headers[debug_section_index].PointerToLineNumbers == 0
|
|
assert section_headers[debug_section_index].NumberOfRelocations == 0
|
|
assert section_headers[debug_section_index].NumberOfLineNumbers == 0
|
|
|
|
# Make sure sections in front of .debug$S have their data preceding it.
|
|
for header in section_headers[:debug_section_index]:
|
|
assert header.PointerToRawData < debug_offset
|
|
assert header.PointerToRelocations < debug_offset
|
|
assert header.PointerToLineNumbers < debug_offset
|
|
|
|
# Make sure sections after of .debug$S have their data following it.
|
|
for header in section_headers[debug_section_index + 1:]:
|
|
# Make sure the .debug$S data is at the very end of section data:
|
|
assert header.PointerToRawData > debug_offset
|
|
assert header.PointerToRelocations == 0
|
|
assert header.PointerToLineNumbers == 0
|
|
|
|
# Make sure the first non-empty section's data starts right after the section
|
|
# headers.
|
|
for section_header in section_headers:
|
|
if section_header.PointerToRawData == 0:
|
|
assert section_header.PointerToRelocations == 0
|
|
assert section_header.PointerToLineNumbers == 0
|
|
continue
|
|
assert section_header.PointerToRawData == data_start
|
|
break
|
|
|
|
# Make sure the symbol table (and hence, string table) appear after the last
|
|
# section:
|
|
assert (coff_header.PointerToSymbolTable >=
|
|
section_headers[-1].PointerToRawData + section_headers[-1].SizeOfRawData)
|
|
|
|
# The symbol table contains a symbol for the no-longer-present .debug$S
|
|
# section. If we leave it there, lld-link will complain:
|
|
#
|
|
# lld-link: error: .debug$S should not refer to non-existent section 5
|
|
#
|
|
# so we need to remove that symbol table entry as well. This shifts symbol
|
|
# entries around and we need to update symbol table indices in:
|
|
# - relocations
|
|
# - line number records (never present)
|
|
# - one aux symbol entry (IMAGE_SYM_CLASS_CLR_TOKEN; not present in ml output)
|
|
SYM = Struct('SYM',
|
|
'8s', 'Name',
|
|
'I', 'Value',
|
|
'h', 'SectionNumber', # Note: Signed!
|
|
'H', 'Type',
|
|
|
|
'B', 'StorageClass',
|
|
'B', 'NumberOfAuxSymbols')
|
|
i = 0
|
|
debug_sym = -1
|
|
while i < coff_header.NumberOfSymbols:
|
|
sym_offset = coff_header.PointerToSymbolTable + i * SYM.size()
|
|
sym = SYM.unpack_from(objdata, sym_offset)
|
|
|
|
# 107 is IMAGE_SYM_CLASS_CLR_TOKEN, which has aux entry "CLR Token
|
|
# Definition", which contains a symbol index. Check it's never present.
|
|
assert sym.StorageClass != 107
|
|
|
|
# Note: sym.SectionNumber is 1-based, debug_section_index is 0-based.
|
|
if sym.SectionNumber - 1 == debug_section_index:
|
|
assert debug_sym == -1, 'more than one .debug$S symbol found'
|
|
debug_sym = i
|
|
# Make sure the .debug$S symbol looks like we expect.
|
|
# In particular, it should have exactly one aux symbol.
|
|
assert sym.Name == b'.debug$S'
|
|
assert sym.Value == 0
|
|
assert sym.Type == 0
|
|
assert sym.StorageClass == 3
|
|
assert sym.NumberOfAuxSymbols == 1
|
|
elif sym.SectionNumber > debug_section_index:
|
|
sym = Subtract(sym, SectionNumber=1)
|
|
SYM.pack_into(objdata, sym_offset, sym)
|
|
i += 1 + sym.NumberOfAuxSymbols
|
|
assert debug_sym != -1, '.debug$S symbol not found'
|
|
|
|
# Note: Usually the .debug$S section is the last, but for files saying
|
|
# `includelib foo.lib`, like safe_terminate_process.asm in 32-bit builds,
|
|
# this isn't true: .drectve is after .debug$S.
|
|
|
|
# Update symbol table indices in relocations.
|
|
# There are a few processor types that have one or two relocation types
|
|
# where SymbolTableIndex has a different meaning, but not for x86.
|
|
REL = Struct('REL',
|
|
'I', 'VirtualAddress',
|
|
'I', 'SymbolTableIndex',
|
|
'H', 'Type')
|
|
for header in section_headers[0:debug_section_index]:
|
|
for j in range(0, header.NumberOfRelocations):
|
|
rel_offset = header.PointerToRelocations + j * REL.size()
|
|
rel = REL.unpack_from(objdata, rel_offset)
|
|
assert rel.SymbolTableIndex != debug_sym
|
|
if rel.SymbolTableIndex > debug_sym:
|
|
rel = Subtract(rel, SymbolTableIndex=2)
|
|
REL.pack_into(objdata, rel_offset, rel)
|
|
|
|
# Update symbol table indices in line numbers -- just check they don't exist.
|
|
for header in section_headers:
|
|
assert header.NumberOfLineNumbers == 0
|
|
|
|
# Now that all indices are updated, remove the symbol table entry referring to
|
|
# .debug$S and its aux entry.
|
|
del objdata[coff_header.PointerToSymbolTable + debug_sym * SYM.size():
|
|
coff_header.PointerToSymbolTable + (debug_sym + 2) * SYM.size()]
|
|
|
|
# Now we know that it's safe to write out the input data, with just the
|
|
# timestamp overwritten to 0, the last section header cut out (and the
|
|
# offsets of all other section headers decremented by the size of that
|
|
# one section header), and the last section's data cut out. The symbol
|
|
# table offset needs to be reduced by one section header and the size of
|
|
# the missing section.
|
|
# (The COFF spec only requires on-disk sections to be aligned in image files,
|
|
# for obj files it's not required. If that wasn't the case, deleting slices
|
|
# if data would not generally be safe.)
|
|
|
|
# Update section offsets and remove .debug$S section data.
|
|
for i in range(0, debug_section_index):
|
|
header = section_headers[i]
|
|
if header.SizeOfRawData:
|
|
header = Subtract(header, PointerToRawData=SECTIONHEADER.size())
|
|
if header.NumberOfRelocations:
|
|
header = Subtract(header, PointerToRelocations=SECTIONHEADER.size())
|
|
if header.NumberOfLineNumbers:
|
|
header = Subtract(header, PointerToLineNumbers=SECTIONHEADER.size())
|
|
SECTIONHEADER.pack_into(
|
|
objdata, COFFHEADER.size() + i * SECTIONHEADER.size(), header)
|
|
for i in range(debug_section_index + 1, len(section_headers)):
|
|
header = section_headers[i]
|
|
shift = SECTIONHEADER.size() + debug_size
|
|
if header.SizeOfRawData:
|
|
header = Subtract(header, PointerToRawData=shift)
|
|
if header.NumberOfRelocations:
|
|
header = Subtract(header, PointerToRelocations=shift)
|
|
if header.NumberOfLineNumbers:
|
|
header = Subtract(header, PointerToLineNumbers=shift)
|
|
SECTIONHEADER.pack_into(
|
|
objdata, COFFHEADER.size() + i * SECTIONHEADER.size(), header)
|
|
|
|
del objdata[debug_offset:debug_offset + debug_size]
|
|
|
|
# Finally, remove .debug$S section header and update coff header.
|
|
coff_header = coff_header._replace(TimeDateStamp=0)
|
|
coff_header = Subtract(coff_header,
|
|
NumberOfSections=1,
|
|
PointerToSymbolTable=SECTIONHEADER.size() + debug_size,
|
|
NumberOfSymbols=2)
|
|
COFFHEADER.pack_into(objdata, 0, coff_header)
|
|
|
|
del objdata[
|
|
COFFHEADER.size() + debug_section_index * SECTIONHEADER.size():
|
|
COFFHEADER.size() + (debug_section_index + 1) * SECTIONHEADER.size()]
|
|
|
|
# All done!
|
|
if sys.version_info.major == 2:
|
|
return objdata.tostring()
|
|
else:
|
|
return objdata.tobytes()
|
|
|
|
|
|
def main():
|
|
ml_result = subprocess.call(sys.argv[1:])
|
|
if ml_result != 0:
|
|
return ml_result
|
|
|
|
objfile = None
|
|
for i in range(1, len(sys.argv)):
|
|
if sys.argv[i].startswith('/Fo'):
|
|
objfile = sys.argv[i][len('/Fo'):]
|
|
assert objfile, 'failed to find ml output'
|
|
|
|
with open(objfile, 'rb') as f:
|
|
objdata = f.read()
|
|
objdata = MakeDeterministic(objdata)
|
|
with open(objfile, 'wb') as f:
|
|
f.write(objdata)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|