diff options
Diffstat (limited to '')
-rw-r--r-- | buildtools/wafsamba/symbols.py | 659 |
1 files changed, 659 insertions, 0 deletions
diff --git a/buildtools/wafsamba/symbols.py b/buildtools/wafsamba/symbols.py new file mode 100644 index 0000000..a6af1ac --- /dev/null +++ b/buildtools/wafsamba/symbols.py @@ -0,0 +1,659 @@ +# a waf tool to extract symbols from object files or libraries +# using nm, producing a set of exposed defined/undefined symbols + +import os, re, subprocess +from waflib import Utils, Build, Options, Logs, Errors +from waflib.Logs import debug +from samba_utils import TO_LIST, LOCAL_CACHE, get_tgt_list + +# these are the data structures used in symbols.py: +# +# bld.env.symbol_map : dictionary mapping public symbol names to list of +# subsystem names where that symbol exists +# +# t.in_library : list of libraries that t is in +# +# bld.env.public_symbols: set of public symbols for each subsystem +# bld.env.used_symbols : set of used symbols for each subsystem +# +# bld.env.syslib_symbols: dictionary mapping system library name to set of symbols +# for that library +# bld.env.library_dict : dictionary mapping built library paths to subsystem names +# +# LOCAL_CACHE(bld, 'TARGET_TYPE') : dictionary mapping subsystem name to target type + + +def symbols_extract(bld, objfiles, dynamic=False): + '''extract symbols from objfile, returning a dictionary containing + the set of undefined and public symbols for each file''' + + ret = {} + + # see if we can get some results from the nm cache + if not bld.env.nm_cache: + bld.env.nm_cache = {} + + objfiles = set(objfiles).copy() + + remaining = set() + for obj in objfiles: + if obj in bld.env.nm_cache: + ret[obj] = bld.env.nm_cache[obj].copy() + else: + remaining.add(obj) + objfiles = remaining + + if len(objfiles) == 0: + return ret + + cmd = ["nm"] + if dynamic: + # needed for some .so files + cmd.append("-D") + cmd.extend(list(objfiles)) + + nmpipe = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout + if len(objfiles) == 1: + filename = list(objfiles)[0] + ret[filename] = { "PUBLIC": set(), "UNDEFINED" : set()} + + for line in nmpipe: + line = line.strip() + if line.endswith(b':'): + filename = line[:-1] + ret[filename] = { "PUBLIC": set(), "UNDEFINED" : set() } + continue + cols = line.split(b" ") + if cols == [b'']: + continue + # see if the line starts with an address + if len(cols) == 3: + symbol_type = cols[1] + symbol = cols[2] + else: + symbol_type = cols[0] + symbol = cols[1] + if symbol_type in b"BDGTRVWSi": + # its a public symbol + ret[filename]["PUBLIC"].add(symbol) + elif symbol_type in b"U": + ret[filename]["UNDEFINED"].add(symbol) + + # add to the cache + for obj in objfiles: + if obj in ret: + bld.env.nm_cache[obj] = ret[obj].copy() + else: + bld.env.nm_cache[obj] = { "PUBLIC": set(), "UNDEFINED" : set() } + + return ret + + +def real_name(name): + if name.find(".objlist") != -1: + name = name[:-8] + return name + + +def find_ldd_path(bld, libname, binary): + '''find the path to the syslib we will link against''' + ret = None + if not bld.env.syslib_paths: + bld.env.syslib_paths = {} + if libname in bld.env.syslib_paths: + return bld.env.syslib_paths[libname] + + lddpipe = subprocess.Popen(['ldd', binary], stdout=subprocess.PIPE).stdout + for line in lddpipe: + line = line.strip() + cols = line.split(b" ") + if len(cols) < 3 or cols[1] != b"=>": + continue + if cols[0].startswith(b"libc."): + # save this one too + bld.env.libc_path = cols[2] + if cols[0].startswith(libname): + ret = cols[2] + bld.env.syslib_paths[libname] = ret + return ret + + +# some regular expressions for parsing readelf output +re_sharedlib = re.compile(b'Shared library: \[(.*)\]') +# output from readelf could be `Library rpath` or `Libray runpath` +re_rpath = re.compile(b'Library (rpath|runpath): \[(.*)\]') + +def get_libs(bld, binname): + '''find the list of linked libraries for any binary or library + binname is the path to the binary/library on disk + + We do this using readelf instead of ldd as we need to avoid recursing + into system libraries + ''' + + # see if we can get the result from the ldd cache + if not bld.env.lib_cache: + bld.env.lib_cache = {} + if binname in bld.env.lib_cache: + return bld.env.lib_cache[binname].copy() + + rpath = [] + libs = set() + + elfpipe = subprocess.Popen(['readelf', '--dynamic', binname], stdout=subprocess.PIPE).stdout + for line in elfpipe: + m = re_sharedlib.search(line) + if m: + libs.add(m.group(1)) + m = re_rpath.search(line) + if m: + # output from Popen is always bytestr even in py3 + rpath.extend(m.group(2).split(b":")) + + ret = set() + for lib in libs: + found = False + for r in rpath: + path = os.path.join(r, lib) + if os.path.exists(path): + ret.add(os.path.realpath(path)) + found = True + break + if not found: + # we didn't find this lib using rpath. It is probably a system + # library, so to find the path to it we either need to use ldd + # or we need to start parsing /etc/ld.so.conf* ourselves. We'll + # use ldd for now, even though it is slow + path = find_ldd_path(bld, lib, binname) + if path: + ret.add(os.path.realpath(path)) + + bld.env.lib_cache[binname] = ret.copy() + + return ret + + +def get_libs_recursive(bld, binname, seen): + '''find the recursive list of linked libraries for any binary or library + binname is the path to the binary/library on disk. seen is a set used + to prevent loops + ''' + if binname in seen: + return set() + ret = get_libs(bld, binname) + seen.add(binname) + for lib in ret: + # we don't want to recurse into system libraries. If a system + # library that we use (eg. libcups) happens to use another library + # (such as libkrb5) which contains common symbols with our own + # libraries, then that is not an error + if lib in bld.env.library_dict: + ret = ret.union(get_libs_recursive(bld, lib, seen)) + return ret + + + +def find_syslib_path(bld, libname, deps): + '''find the path to the syslib we will link against''' + # the strategy is to use the targets that depend on the library, and run ldd + # on it to find the real location of the library that is used + + linkpath = deps[0].link_task.outputs[0].abspath(bld.env) + + if libname == "python": + libname += bld.env.PYTHON_VERSION + + return find_ldd_path(bld, "lib%s" % libname.lower(), linkpath) + + +def build_symbol_sets(bld, tgt_list): + '''build the public_symbols and undefined_symbols attributes for each target''' + + if bld.env.public_symbols: + return + + objlist = [] # list of object file + objmap = {} # map from object filename to target (subsystem) name + + for t in tgt_list: + t.public_symbols = set() + t.undefined_symbols = set() + t.used_symbols = set() + for tsk in getattr(t, 'compiled_tasks', []): + for output in tsk.outputs: + objpath = output.abspath(bld.env) + objlist.append(objpath) + objmap[objpath] = t + + symbols = symbols_extract(bld, objlist) + for obj in objlist: + t = objmap[obj] + t.public_symbols = t.public_symbols.union(symbols[obj]["PUBLIC"]) + t.undefined_symbols = t.undefined_symbols.union(symbols[obj]["UNDEFINED"]) + t.used_symbols = t.used_symbols.union(symbols[obj]["UNDEFINED"]) + + t.undefined_symbols = t.undefined_symbols.difference(t.public_symbols) + + # and the reverse map of public symbols to subsystem name + bld.env.symbol_map = {} + + for t in tgt_list: + for s in t.public_symbols: + if not s in bld.env.symbol_map: + bld.env.symbol_map[s] = [] + bld.env.symbol_map[s].append(real_name(t.sname)) + + targets = LOCAL_CACHE(bld, 'TARGET_TYPE') + + bld.env.public_symbols = {} + for t in tgt_list: + name = real_name(t.sname) + if name in bld.env.public_symbols: + bld.env.public_symbols[name] = bld.env.public_symbols[name].union(t.public_symbols) + else: + bld.env.public_symbols[name] = t.public_symbols + if t.samba_type in ['LIBRARY', 'PLUGIN']: + for dep in t.add_objects: + t2 = bld.get_tgen_by_name(dep) + bld.ASSERT(t2 is not None, "Library '%s' has unknown dependency '%s'" % (name, dep)) + bld.env.public_symbols[name] = bld.env.public_symbols[name].union(t2.public_symbols) + + bld.env.used_symbols = {} + for t in tgt_list: + name = real_name(t.sname) + if name in bld.env.used_symbols: + bld.env.used_symbols[name] = bld.env.used_symbols[name].union(t.used_symbols) + else: + bld.env.used_symbols[name] = t.used_symbols + if t.samba_type in ['LIBRARY', 'PLUGIN']: + for dep in t.add_objects: + t2 = bld.get_tgen_by_name(dep) + bld.ASSERT(t2 is not None, "Library '%s' has unknown dependency '%s'" % (name, dep)) + bld.env.used_symbols[name] = bld.env.used_symbols[name].union(t2.used_symbols) + + +def build_library_dict(bld, tgt_list): + '''build the library_dict dictionary''' + + if bld.env.library_dict: + return + + bld.env.library_dict = {} + + for t in tgt_list: + if t.samba_type in [ 'LIBRARY', 'PLUGIN', 'PYTHON' ]: + linkpath = os.path.realpath(t.link_task.outputs[0].abspath(bld.env)) + bld.env.library_dict[linkpath] = t.sname + + +def build_syslib_sets(bld, tgt_list): + '''build the public_symbols for all syslibs''' + + if bld.env.syslib_symbols: + return + + # work out what syslibs we depend on, and what targets those are used in + syslibs = {} + objmap = {} + for t in tgt_list: + if getattr(t, 'uselib', []) and t.samba_type in [ 'LIBRARY', 'PLUGIN', 'BINARY', 'PYTHON' ]: + for lib in t.uselib: + if lib in ['PYEMBED', 'PYEXT']: + lib = "python" + if not lib in syslibs: + syslibs[lib] = [] + syslibs[lib].append(t) + + # work out the paths to each syslib + syslib_paths = [] + for lib in syslibs: + path = find_syslib_path(bld, lib, syslibs[lib]) + if path is None: + Logs.warn("Unable to find syslib path for %s" % lib) + if path is not None: + syslib_paths.append(path) + objmap[path] = lib.lower() + + # add in libc + syslib_paths.append(bld.env.libc_path) + objmap[bld.env.libc_path] = 'c' + + symbols = symbols_extract(bld, syslib_paths, dynamic=True) + + # keep a map of syslib names to public symbols + bld.env.syslib_symbols = {} + for lib in symbols: + bld.env.syslib_symbols[lib] = symbols[lib]["PUBLIC"] + + # add to the map of symbols to dependencies + for lib in symbols: + for sym in symbols[lib]["PUBLIC"]: + if not sym in bld.env.symbol_map: + bld.env.symbol_map[sym] = [] + bld.env.symbol_map[sym].append(objmap[lib]) + + # keep the libc symbols as well, as these are useful for some of the + # sanity checks + bld.env.libc_symbols = symbols[bld.env.libc_path]["PUBLIC"] + + # add to the combined map of dependency name to public_symbols + for lib in bld.env.syslib_symbols: + bld.env.public_symbols[objmap[lib]] = bld.env.syslib_symbols[lib] + + +def build_autodeps(bld, t): + '''build the set of dependencies for a target''' + deps = set() + name = real_name(t.sname) + + targets = LOCAL_CACHE(bld, 'TARGET_TYPE') + + for sym in t.undefined_symbols: + if sym in t.public_symbols: + continue + if sym in bld.env.symbol_map: + depname = bld.env.symbol_map[sym] + if depname == [ name ]: + # self dependencies aren't interesting + continue + if t.in_library == depname: + # no need to depend on the library we are part of + continue + if depname[0] in ['c', 'python']: + # these don't go into autodeps + continue + if targets[depname[0]] in [ 'SYSLIB' ]: + deps.add(depname[0]) + continue + t2 = bld.get_tgen_by_name(depname[0]) + if len(t2.in_library) != 1: + deps.add(depname[0]) + continue + if t2.in_library == t.in_library: + # if we're part of the same library, we don't need to autodep + continue + deps.add(t2.in_library[0]) + t.autodeps = deps + + +def build_library_names(bld, tgt_list): + '''add a in_library attribute to all targets that are part of a library''' + + if bld.env.done_build_library_names: + return + + for t in tgt_list: + t.in_library = [] + + for t in tgt_list: + if t.samba_type in ['LIBRARY', 'PLUGIN']: + for obj in t.samba_deps_extended: + t2 = bld.get_tgen_by_name(obj) + if t2 and t2.samba_type in [ 'SUBSYSTEM', 'BUILTIN', 'ASN1' ]: + if not t.sname in t2.in_library: + t2.in_library.append(t.sname) + bld.env.done_build_library_names = True + + +def check_library_deps(bld, t): + '''check that all the autodeps that have mutual dependency of this + target are in the same library as the target''' + + name = real_name(t.sname) + + if len(t.in_library) > 1: + Logs.warn("WARNING: Target '%s' in multiple libraries: %s" % (t.sname, t.in_library)) + + for dep in t.autodeps: + t2 = bld.get_tgen_by_name(dep) + if t2 is None: + continue + for dep2 in t2.autodeps: + if dep2 == name and t.in_library != t2.in_library: + Logs.warn("WARNING: mutual dependency %s <=> %s" % (name, real_name(t2.sname))) + Logs.warn("Libraries should match. %s != %s" % (t.in_library, t2.in_library)) + # raise Errors.WafError("illegal mutual dependency") + + +def check_syslib_collisions(bld, tgt_list): + '''check if a target has any symbol collisions with a syslib + + We do not want any code in Samba to use a symbol name from a + system library. The chance of that causing problems is just too + high. Note that libreplace uses a rep_XX approach of renaming + symbols via macros + ''' + + has_error = False + for t in tgt_list: + for lib in bld.env.syslib_symbols: + common = t.public_symbols.intersection(bld.env.syslib_symbols[lib]) + if common: + Logs.error("ERROR: Target '%s' has symbols '%s' which is also in syslib '%s'" % (t.sname, common, lib)) + has_error = True + if has_error: + raise Errors.WafError("symbols in common with system libraries") + + +def check_dependencies(bld, t): + '''check for depenencies that should be changed''' + + if bld.get_tgen_by_name(t.sname + ".objlist"): + return + + targets = LOCAL_CACHE(bld, 'TARGET_TYPE') + + remaining = t.undefined_symbols.copy() + remaining = remaining.difference(t.public_symbols) + + sname = real_name(t.sname) + + deps = set(t.samba_deps) + for d in t.samba_deps: + if targets[d] in [ 'EMPTY', 'DISABLED', 'SYSLIB', 'GENERATOR' ]: + continue + bld.ASSERT(d in bld.env.public_symbols, "Failed to find symbol list for dependency '%s'" % d) + diff = remaining.intersection(bld.env.public_symbols[d]) + if not diff and targets[sname] != 'LIBRARY': + Logs.info("Target '%s' has no dependency on %s" % (sname, d)) + else: + remaining = remaining.difference(diff) + + t.unsatisfied_symbols = set() + needed = {} + for sym in remaining: + if sym in bld.env.symbol_map: + dep = bld.env.symbol_map[sym] + if not dep[0] in needed: + needed[dep[0]] = set() + needed[dep[0]].add(sym) + else: + t.unsatisfied_symbols.add(sym) + + for dep in needed: + Logs.info("Target '%s' should add dep '%s' for symbols %s" % (sname, dep, " ".join(needed[dep]))) + + + +def check_syslib_dependencies(bld, t): + '''check for syslib depenencies''' + + if bld.get_tgen_by_name(t.sname + ".objlist"): + return + + sname = real_name(t.sname) + + remaining = set() + + features = TO_LIST(t.features) + if 'pyembed' in features or 'pyext' in features: + if 'python' in bld.env.public_symbols: + t.unsatisfied_symbols = t.unsatisfied_symbols.difference(bld.env.public_symbols['python']) + + needed = {} + for sym in t.unsatisfied_symbols: + if sym in bld.env.symbol_map: + dep = bld.env.symbol_map[sym][0] + if dep == 'c': + continue + if not dep in needed: + needed[dep] = set() + needed[dep].add(sym) + else: + remaining.add(sym) + + for dep in needed: + Logs.info("Target '%s' should add syslib dep '%s' for symbols %s" % (sname, dep, " ".join(needed[dep]))) + + if remaining: + debug("deps: Target '%s' has unsatisfied symbols: %s" % (sname, " ".join(remaining))) + + + +def symbols_symbolcheck(task): + '''check the internal dependency lists''' + bld = task.env.bld + tgt_list = get_tgt_list(bld) + + build_symbol_sets(bld, tgt_list) + build_library_names(bld, tgt_list) + + for t in tgt_list: + t.autodeps = set() + if getattr(t, 'source', ''): + build_autodeps(bld, t) + + for t in tgt_list: + check_dependencies(bld, t) + + for t in tgt_list: + check_library_deps(bld, t) + +def symbols_syslibcheck(task): + '''check the syslib dependencies''' + bld = task.env.bld + tgt_list = get_tgt_list(bld) + + build_syslib_sets(bld, tgt_list) + check_syslib_collisions(bld, tgt_list) + + for t in tgt_list: + check_syslib_dependencies(bld, t) + + +def symbols_whyneeded(task): + """check why 'target' needs to link to 'subsystem'""" + bld = task.env.bld + tgt_list = get_tgt_list(bld) + + why = Options.options.WHYNEEDED.split(":") + if len(why) != 2: + raise Errors.WafError("usage: WHYNEEDED=TARGET:DEPENDENCY") + target = why[0] + subsystem = why[1] + + build_symbol_sets(bld, tgt_list) + build_library_names(bld, tgt_list) + build_syslib_sets(bld, tgt_list) + + Logs.info("Checking why %s needs to link to %s" % (target, subsystem)) + if not target in bld.env.used_symbols: + Logs.warn("unable to find target '%s' in used_symbols dict" % target) + return + if not subsystem in bld.env.public_symbols: + Logs.warn("unable to find subsystem '%s' in public_symbols dict" % subsystem) + return + overlap = bld.env.used_symbols[target].intersection(bld.env.public_symbols[subsystem]) + if not overlap: + Logs.info("target '%s' doesn't use any public symbols from '%s'" % (target, subsystem)) + else: + Logs.info("target '%s' uses symbols %s from '%s'" % (target, overlap, subsystem)) + + +def report_duplicate(bld, binname, sym, libs, fail_on_error): + '''report duplicated symbols''' + if sym in ['_init', '_fini', '_edata', '_end', '__bss_start']: + return + libnames = [] + for lib in libs: + if lib in bld.env.library_dict: + libnames.append(bld.env.library_dict[lib]) + else: + libnames.append(lib) + if fail_on_error: + raise Errors.WafError("%s: Symbol %s linked in multiple libraries %s" % (binname, sym, libnames)) + else: + print("%s: Symbol %s linked in multiple libraries %s" % (binname, sym, libnames)) + + +def symbols_dupcheck_binary(bld, binname, fail_on_error): + '''check for duplicated symbols in one binary''' + + libs = get_libs_recursive(bld, binname, set()) + symlist = symbols_extract(bld, libs, dynamic=True) + + symmap = {} + for libpath in symlist: + for sym in symlist[libpath]['PUBLIC']: + if sym == '_GLOBAL_OFFSET_TABLE_': + continue + if not sym in symmap: + symmap[sym] = set() + symmap[sym].add(libpath) + for sym in symmap: + if len(symmap[sym]) > 1: + for libpath in symmap[sym]: + if libpath in bld.env.library_dict: + report_duplicate(bld, binname, sym, symmap[sym], fail_on_error) + break + +def symbols_dupcheck(task, fail_on_error=False): + '''check for symbols defined in two different subsystems''' + bld = task.env.bld + tgt_list = get_tgt_list(bld) + + targets = LOCAL_CACHE(bld, 'TARGET_TYPE') + + build_library_dict(bld, tgt_list) + for t in tgt_list: + if t.samba_type == 'BINARY': + binname = os.path.relpath(t.link_task.outputs[0].abspath(bld.env), os.getcwd()) + symbols_dupcheck_binary(bld, binname, fail_on_error) + + +def symbols_dupcheck_fatal(task): + '''check for symbols defined in two different subsystems (and fail if duplicates are found)''' + symbols_dupcheck(task, fail_on_error=True) + + +def SYMBOL_CHECK(bld): + '''check our dependency lists''' + if Options.options.SYMBOLCHECK: + bld.SET_BUILD_GROUP('symbolcheck') + task = bld(rule=symbols_symbolcheck, always=True, name='symbol checking') + task.env.bld = bld + + bld.SET_BUILD_GROUP('syslibcheck') + task = bld(rule=symbols_syslibcheck, always=True, name='syslib checking') + task.env.bld = bld + + bld.SET_BUILD_GROUP('syslibcheck') + task = bld(rule=symbols_dupcheck, always=True, name='symbol duplicate checking') + task.env.bld = bld + + if Options.options.WHYNEEDED: + bld.SET_BUILD_GROUP('syslibcheck') + task = bld(rule=symbols_whyneeded, always=True, name='check why a dependency is needed') + task.env.bld = bld + + +Build.BuildContext.SYMBOL_CHECK = SYMBOL_CHECK + +def DUP_SYMBOL_CHECK(bld): + if Options.options.DUP_SYMBOLCHECK and bld.env.DEVELOPER: + '''check for duplicate symbols''' + bld.SET_BUILD_GROUP('syslibcheck') + task = bld(rule=symbols_dupcheck_fatal, always=True, name='symbol duplicate checking') + task.env.bld = bld + +Build.BuildContext.DUP_SYMBOL_CHECK = DUP_SYMBOL_CHECK |