summaryrefslogtreecommitdiffstats
path: root/buildtools/wafsamba/symbols.py
diff options
context:
space:
mode:
Diffstat (limited to 'buildtools/wafsamba/symbols.py')
-rw-r--r--buildtools/wafsamba/symbols.py659
1 files changed, 659 insertions, 0 deletions
diff --git a/buildtools/wafsamba/symbols.py b/buildtools/wafsamba/symbols.py
new file mode 100644
index 0000000..99e121c
--- /dev/null
+++ b/buildtools/wafsamba/symbols.py
@@ -0,0 +1,659 @@
+# a waf tool to extract symbols from object files or libraries
+# using nm, producing a set of exposed defined/undefined symbols
+
+import os, re, subprocess
+from waflib import Utils, Build, Options, Logs, Errors
+from waflib.Logs import debug
+from samba_utils import TO_LIST, LOCAL_CACHE, get_tgt_list
+
+# these are the data structures used in symbols.py:
+#
+# bld.env.symbol_map : dictionary mapping public symbol names to list of
+# subsystem names where that symbol exists
+#
+# t.in_library : list of libraries that t is in
+#
+# bld.env.public_symbols: set of public symbols for each subsystem
+# bld.env.used_symbols : set of used symbols for each subsystem
+#
+# bld.env.syslib_symbols: dictionary mapping system library name to set of symbols
+# for that library
+# bld.env.library_dict : dictionary mapping built library paths to subsystem names
+#
+# LOCAL_CACHE(bld, 'TARGET_TYPE') : dictionary mapping subsystem name to target type
+
+
+def symbols_extract(bld, objfiles, dynamic=False):
+ '''extract symbols from objfile, returning a dictionary containing
+ the set of undefined and public symbols for each file'''
+
+ ret = {}
+
+ # see if we can get some results from the nm cache
+ if not bld.env.nm_cache:
+ bld.env.nm_cache = {}
+
+ objfiles = set(objfiles).copy()
+
+ remaining = set()
+ for obj in objfiles:
+ if obj in bld.env.nm_cache:
+ ret[obj] = bld.env.nm_cache[obj].copy()
+ else:
+ remaining.add(obj)
+ objfiles = remaining
+
+ if len(objfiles) == 0:
+ return ret
+
+ cmd = ["nm"]
+ if dynamic:
+ # needed for some .so files
+ cmd.append("-D")
+ cmd.extend(list(objfiles))
+
+ nmpipe = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
+ if len(objfiles) == 1:
+ filename = list(objfiles)[0]
+ ret[filename] = { "PUBLIC": set(), "UNDEFINED" : set()}
+
+ for line in nmpipe:
+ line = line.strip()
+ if line.endswith(b':'):
+ filename = line[:-1]
+ ret[filename] = { "PUBLIC": set(), "UNDEFINED" : set() }
+ continue
+ cols = line.split(b" ")
+ if cols == [b'']:
+ continue
+ # see if the line starts with an address
+ if len(cols) == 3:
+ symbol_type = cols[1]
+ symbol = cols[2]
+ else:
+ symbol_type = cols[0]
+ symbol = cols[1]
+ if symbol_type in b"BDGTRVWSi":
+ # its a public symbol
+ ret[filename]["PUBLIC"].add(symbol)
+ elif symbol_type in b"U":
+ ret[filename]["UNDEFINED"].add(symbol)
+
+ # add to the cache
+ for obj in objfiles:
+ if obj in ret:
+ bld.env.nm_cache[obj] = ret[obj].copy()
+ else:
+ bld.env.nm_cache[obj] = { "PUBLIC": set(), "UNDEFINED" : set() }
+
+ return ret
+
+
+def real_name(name):
+ if name.find(".objlist") != -1:
+ name = name[:-8]
+ return name
+
+
+def find_ldd_path(bld, libname, binary):
+ '''find the path to the syslib we will link against'''
+ ret = None
+ if not bld.env.syslib_paths:
+ bld.env.syslib_paths = {}
+ if libname in bld.env.syslib_paths:
+ return bld.env.syslib_paths[libname]
+
+ lddpipe = subprocess.Popen(['ldd', binary], stdout=subprocess.PIPE).stdout
+ for line in lddpipe:
+ line = line.strip()
+ cols = line.split(b" ")
+ if len(cols) < 3 or cols[1] != b"=>":
+ continue
+ if cols[0].startswith(b"libc."):
+ # save this one too
+ bld.env.libc_path = cols[2]
+ if cols[0].startswith(libname):
+ ret = cols[2]
+ bld.env.syslib_paths[libname] = ret
+ return ret
+
+
+# some regular expressions for parsing readelf output
+re_sharedlib = re.compile(rb'Shared library: \[(.*)\]')
+# output from readelf could be `Library rpath` or `Libray runpath`
+re_rpath = re.compile(rb'Library (rpath|runpath): \[(.*)\]')
+
+def get_libs(bld, binname):
+ '''find the list of linked libraries for any binary or library
+ binname is the path to the binary/library on disk
+
+ We do this using readelf instead of ldd as we need to avoid recursing
+ into system libraries
+ '''
+
+ # see if we can get the result from the ldd cache
+ if not bld.env.lib_cache:
+ bld.env.lib_cache = {}
+ if binname in bld.env.lib_cache:
+ return bld.env.lib_cache[binname].copy()
+
+ rpath = []
+ libs = set()
+
+ elfpipe = subprocess.Popen(['readelf', '--dynamic', binname], stdout=subprocess.PIPE).stdout
+ for line in elfpipe:
+ m = re_sharedlib.search(line)
+ if m:
+ libs.add(m.group(1))
+ m = re_rpath.search(line)
+ if m:
+ # output from Popen is always bytestr even in py3
+ rpath.extend(m.group(2).split(b":"))
+
+ ret = set()
+ for lib in libs:
+ found = False
+ for r in rpath:
+ path = os.path.join(r, lib)
+ if os.path.exists(path):
+ ret.add(os.path.realpath(path))
+ found = True
+ break
+ if not found:
+ # we didn't find this lib using rpath. It is probably a system
+ # library, so to find the path to it we either need to use ldd
+ # or we need to start parsing /etc/ld.so.conf* ourselves. We'll
+ # use ldd for now, even though it is slow
+ path = find_ldd_path(bld, lib, binname)
+ if path:
+ ret.add(os.path.realpath(path))
+
+ bld.env.lib_cache[binname] = ret.copy()
+
+ return ret
+
+
+def get_libs_recursive(bld, binname, seen):
+ '''find the recursive list of linked libraries for any binary or library
+ binname is the path to the binary/library on disk. seen is a set used
+ to prevent loops
+ '''
+ if binname in seen:
+ return set()
+ ret = get_libs(bld, binname)
+ seen.add(binname)
+ for lib in ret:
+ # we don't want to recurse into system libraries. If a system
+ # library that we use (eg. libcups) happens to use another library
+ # (such as libkrb5) which contains common symbols with our own
+ # libraries, then that is not an error
+ if lib in bld.env.library_dict:
+ ret = ret.union(get_libs_recursive(bld, lib, seen))
+ return ret
+
+
+
+def find_syslib_path(bld, libname, deps):
+ '''find the path to the syslib we will link against'''
+ # the strategy is to use the targets that depend on the library, and run ldd
+ # on it to find the real location of the library that is used
+
+ linkpath = deps[0].link_task.outputs[0].abspath(bld.env)
+
+ if libname == "python":
+ libname += bld.env.PYTHON_VERSION
+
+ return find_ldd_path(bld, "lib%s" % libname.lower(), linkpath)
+
+
+def build_symbol_sets(bld, tgt_list):
+ '''build the public_symbols and undefined_symbols attributes for each target'''
+
+ if bld.env.public_symbols:
+ return
+
+ objlist = [] # list of object file
+ objmap = {} # map from object filename to target (subsystem) name
+
+ for t in tgt_list:
+ t.public_symbols = set()
+ t.undefined_symbols = set()
+ t.used_symbols = set()
+ for tsk in getattr(t, 'compiled_tasks', []):
+ for output in tsk.outputs:
+ objpath = output.abspath(bld.env)
+ objlist.append(objpath)
+ objmap[objpath] = t
+
+ symbols = symbols_extract(bld, objlist)
+ for obj in objlist:
+ t = objmap[obj]
+ t.public_symbols = t.public_symbols.union(symbols[obj]["PUBLIC"])
+ t.undefined_symbols = t.undefined_symbols.union(symbols[obj]["UNDEFINED"])
+ t.used_symbols = t.used_symbols.union(symbols[obj]["UNDEFINED"])
+
+ t.undefined_symbols = t.undefined_symbols.difference(t.public_symbols)
+
+ # and the reverse map of public symbols to subsystem name
+ bld.env.symbol_map = {}
+
+ for t in tgt_list:
+ for s in t.public_symbols:
+ if not s in bld.env.symbol_map:
+ bld.env.symbol_map[s] = []
+ bld.env.symbol_map[s].append(real_name(t.sname))
+
+ targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
+
+ bld.env.public_symbols = {}
+ for t in tgt_list:
+ name = real_name(t.sname)
+ if name in bld.env.public_symbols:
+ bld.env.public_symbols[name] = bld.env.public_symbols[name].union(t.public_symbols)
+ else:
+ bld.env.public_symbols[name] = t.public_symbols
+ if t.samba_type in ['LIBRARY', 'PLUGIN']:
+ for dep in t.add_objects:
+ t2 = bld.get_tgen_by_name(dep)
+ bld.ASSERT(t2 is not None, "Library '%s' has unknown dependency '%s'" % (name, dep))
+ bld.env.public_symbols[name] = bld.env.public_symbols[name].union(t2.public_symbols)
+
+ bld.env.used_symbols = {}
+ for t in tgt_list:
+ name = real_name(t.sname)
+ if name in bld.env.used_symbols:
+ bld.env.used_symbols[name] = bld.env.used_symbols[name].union(t.used_symbols)
+ else:
+ bld.env.used_symbols[name] = t.used_symbols
+ if t.samba_type in ['LIBRARY', 'PLUGIN']:
+ for dep in t.add_objects:
+ t2 = bld.get_tgen_by_name(dep)
+ bld.ASSERT(t2 is not None, "Library '%s' has unknown dependency '%s'" % (name, dep))
+ bld.env.used_symbols[name] = bld.env.used_symbols[name].union(t2.used_symbols)
+
+
+def build_library_dict(bld, tgt_list):
+ '''build the library_dict dictionary'''
+
+ if bld.env.library_dict:
+ return
+
+ bld.env.library_dict = {}
+
+ for t in tgt_list:
+ if t.samba_type in [ 'LIBRARY', 'PLUGIN', 'PYTHON' ]:
+ linkpath = os.path.realpath(t.link_task.outputs[0].abspath(bld.env))
+ bld.env.library_dict[linkpath] = t.sname
+
+
+def build_syslib_sets(bld, tgt_list):
+ '''build the public_symbols for all syslibs'''
+
+ if bld.env.syslib_symbols:
+ return
+
+ # work out what syslibs we depend on, and what targets those are used in
+ syslibs = {}
+ objmap = {}
+ for t in tgt_list:
+ if getattr(t, 'uselib', []) and t.samba_type in [ 'LIBRARY', 'PLUGIN', 'BINARY', 'PYTHON' ]:
+ for lib in t.uselib:
+ if lib in ['PYEMBED', 'PYEXT']:
+ lib = "python"
+ if not lib in syslibs:
+ syslibs[lib] = []
+ syslibs[lib].append(t)
+
+ # work out the paths to each syslib
+ syslib_paths = []
+ for lib in syslibs:
+ path = find_syslib_path(bld, lib, syslibs[lib])
+ if path is None:
+ Logs.warn("Unable to find syslib path for %s" % lib)
+ if path is not None:
+ syslib_paths.append(path)
+ objmap[path] = lib.lower()
+
+ # add in libc
+ syslib_paths.append(bld.env.libc_path)
+ objmap[bld.env.libc_path] = 'c'
+
+ symbols = symbols_extract(bld, syslib_paths, dynamic=True)
+
+ # keep a map of syslib names to public symbols
+ bld.env.syslib_symbols = {}
+ for lib in symbols:
+ bld.env.syslib_symbols[lib] = symbols[lib]["PUBLIC"]
+
+ # add to the map of symbols to dependencies
+ for lib in symbols:
+ for sym in symbols[lib]["PUBLIC"]:
+ if not sym in bld.env.symbol_map:
+ bld.env.symbol_map[sym] = []
+ bld.env.symbol_map[sym].append(objmap[lib])
+
+ # keep the libc symbols as well, as these are useful for some of the
+ # sanity checks
+ bld.env.libc_symbols = symbols[bld.env.libc_path]["PUBLIC"]
+
+ # add to the combined map of dependency name to public_symbols
+ for lib in bld.env.syslib_symbols:
+ bld.env.public_symbols[objmap[lib]] = bld.env.syslib_symbols[lib]
+
+
+def build_autodeps(bld, t):
+ '''build the set of dependencies for a target'''
+ deps = set()
+ name = real_name(t.sname)
+
+ targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
+
+ for sym in t.undefined_symbols:
+ if sym in t.public_symbols:
+ continue
+ if sym in bld.env.symbol_map:
+ depname = bld.env.symbol_map[sym]
+ if depname == [ name ]:
+ # self dependencies aren't interesting
+ continue
+ if t.in_library == depname:
+ # no need to depend on the library we are part of
+ continue
+ if depname[0] in ['c', 'python']:
+ # these don't go into autodeps
+ continue
+ if targets[depname[0]] in [ 'SYSLIB' ]:
+ deps.add(depname[0])
+ continue
+ t2 = bld.get_tgen_by_name(depname[0])
+ if len(t2.in_library) != 1:
+ deps.add(depname[0])
+ continue
+ if t2.in_library == t.in_library:
+ # if we're part of the same library, we don't need to autodep
+ continue
+ deps.add(t2.in_library[0])
+ t.autodeps = deps
+
+
+def build_library_names(bld, tgt_list):
+ '''add a in_library attribute to all targets that are part of a library'''
+
+ if bld.env.done_build_library_names:
+ return
+
+ for t in tgt_list:
+ t.in_library = []
+
+ for t in tgt_list:
+ if t.samba_type in ['LIBRARY', 'PLUGIN']:
+ for obj in t.samba_deps_extended:
+ t2 = bld.get_tgen_by_name(obj)
+ if t2 and t2.samba_type in [ 'SUBSYSTEM', 'BUILTIN', 'ASN1' ]:
+ if not t.sname in t2.in_library:
+ t2.in_library.append(t.sname)
+ bld.env.done_build_library_names = True
+
+
+def check_library_deps(bld, t):
+ '''check that all the autodeps that have mutual dependency of this
+ target are in the same library as the target'''
+
+ name = real_name(t.sname)
+
+ if len(t.in_library) > 1:
+ Logs.warn("WARNING: Target '%s' in multiple libraries: %s" % (t.sname, t.in_library))
+
+ for dep in t.autodeps:
+ t2 = bld.get_tgen_by_name(dep)
+ if t2 is None:
+ continue
+ for dep2 in t2.autodeps:
+ if dep2 == name and t.in_library != t2.in_library:
+ Logs.warn("WARNING: mutual dependency %s <=> %s" % (name, real_name(t2.sname)))
+ Logs.warn("Libraries should match. %s != %s" % (t.in_library, t2.in_library))
+ # raise Errors.WafError("illegal mutual dependency")
+
+
+def check_syslib_collisions(bld, tgt_list):
+ '''check if a target has any symbol collisions with a syslib
+
+ We do not want any code in Samba to use a symbol name from a
+ system library. The chance of that causing problems is just too
+ high. Note that libreplace uses a rep_XX approach of renaming
+ symbols via macros
+ '''
+
+ has_error = False
+ for t in tgt_list:
+ for lib in bld.env.syslib_symbols:
+ common = t.public_symbols.intersection(bld.env.syslib_symbols[lib])
+ if common:
+ Logs.error("ERROR: Target '%s' has symbols '%s' which is also in syslib '%s'" % (t.sname, common, lib))
+ has_error = True
+ if has_error:
+ raise Errors.WafError("symbols in common with system libraries")
+
+
+def check_dependencies(bld, t):
+ '''check for dependencies that should be changed'''
+
+ if bld.get_tgen_by_name(t.sname + ".objlist"):
+ return
+
+ targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
+
+ remaining = t.undefined_symbols.copy()
+ remaining = remaining.difference(t.public_symbols)
+
+ sname = real_name(t.sname)
+
+ deps = set(t.samba_deps)
+ for d in t.samba_deps:
+ if targets[d] in [ 'EMPTY', 'DISABLED', 'SYSLIB', 'GENERATOR' ]:
+ continue
+ bld.ASSERT(d in bld.env.public_symbols, "Failed to find symbol list for dependency '%s'" % d)
+ diff = remaining.intersection(bld.env.public_symbols[d])
+ if not diff and targets[sname] != 'LIBRARY':
+ Logs.info("Target '%s' has no dependency on %s" % (sname, d))
+ else:
+ remaining = remaining.difference(diff)
+
+ t.unsatisfied_symbols = set()
+ needed = {}
+ for sym in remaining:
+ if sym in bld.env.symbol_map:
+ dep = bld.env.symbol_map[sym]
+ if not dep[0] in needed:
+ needed[dep[0]] = set()
+ needed[dep[0]].add(sym)
+ else:
+ t.unsatisfied_symbols.add(sym)
+
+ for dep in needed:
+ Logs.info("Target '%s' should add dep '%s' for symbols %s" % (sname, dep, " ".join(needed[dep])))
+
+
+
+def check_syslib_dependencies(bld, t):
+ '''check for syslib depenencies'''
+
+ if bld.get_tgen_by_name(t.sname + ".objlist"):
+ return
+
+ sname = real_name(t.sname)
+
+ remaining = set()
+
+ features = TO_LIST(t.features)
+ if 'pyembed' in features or 'pyext' in features:
+ if 'python' in bld.env.public_symbols:
+ t.unsatisfied_symbols = t.unsatisfied_symbols.difference(bld.env.public_symbols['python'])
+
+ needed = {}
+ for sym in t.unsatisfied_symbols:
+ if sym in bld.env.symbol_map:
+ dep = bld.env.symbol_map[sym][0]
+ if dep == 'c':
+ continue
+ if not dep in needed:
+ needed[dep] = set()
+ needed[dep].add(sym)
+ else:
+ remaining.add(sym)
+
+ for dep in needed:
+ Logs.info("Target '%s' should add syslib dep '%s' for symbols %s" % (sname, dep, " ".join(needed[dep])))
+
+ if remaining:
+ debug("deps: Target '%s' has unsatisfied symbols: %s" % (sname, " ".join(remaining)))
+
+
+
+def symbols_symbolcheck(task):
+ '''check the internal dependency lists'''
+ bld = task.env.bld
+ tgt_list = get_tgt_list(bld)
+
+ build_symbol_sets(bld, tgt_list)
+ build_library_names(bld, tgt_list)
+
+ for t in tgt_list:
+ t.autodeps = set()
+ if getattr(t, 'source', ''):
+ build_autodeps(bld, t)
+
+ for t in tgt_list:
+ check_dependencies(bld, t)
+
+ for t in tgt_list:
+ check_library_deps(bld, t)
+
+def symbols_syslibcheck(task):
+ '''check the syslib dependencies'''
+ bld = task.env.bld
+ tgt_list = get_tgt_list(bld)
+
+ build_syslib_sets(bld, tgt_list)
+ check_syslib_collisions(bld, tgt_list)
+
+ for t in tgt_list:
+ check_syslib_dependencies(bld, t)
+
+
+def symbols_whyneeded(task):
+ """check why 'target' needs to link to 'subsystem'"""
+ bld = task.env.bld
+ tgt_list = get_tgt_list(bld)
+
+ why = Options.options.WHYNEEDED.split(":")
+ if len(why) != 2:
+ raise Errors.WafError("usage: WHYNEEDED=TARGET:DEPENDENCY")
+ target = why[0]
+ subsystem = why[1]
+
+ build_symbol_sets(bld, tgt_list)
+ build_library_names(bld, tgt_list)
+ build_syslib_sets(bld, tgt_list)
+
+ Logs.info("Checking why %s needs to link to %s" % (target, subsystem))
+ if not target in bld.env.used_symbols:
+ Logs.warn("unable to find target '%s' in used_symbols dict" % target)
+ return
+ if not subsystem in bld.env.public_symbols:
+ Logs.warn("unable to find subsystem '%s' in public_symbols dict" % subsystem)
+ return
+ overlap = bld.env.used_symbols[target].intersection(bld.env.public_symbols[subsystem])
+ if not overlap:
+ Logs.info("target '%s' doesn't use any public symbols from '%s'" % (target, subsystem))
+ else:
+ Logs.info("target '%s' uses symbols %s from '%s'" % (target, overlap, subsystem))
+
+
+def report_duplicate(bld, binname, sym, libs, fail_on_error):
+ '''report duplicated symbols'''
+ if sym in ['_init', '_fini', '_edata', '_end', '__bss_start']:
+ return
+ libnames = []
+ for lib in libs:
+ if lib in bld.env.library_dict:
+ libnames.append(bld.env.library_dict[lib])
+ else:
+ libnames.append(lib)
+ if fail_on_error:
+ raise Errors.WafError("%s: Symbol %s linked in multiple libraries %s" % (binname, sym, libnames))
+ else:
+ print("%s: Symbol %s linked in multiple libraries %s" % (binname, sym, libnames))
+
+
+def symbols_dupcheck_binary(bld, binname, fail_on_error):
+ '''check for duplicated symbols in one binary'''
+
+ libs = get_libs_recursive(bld, binname, set())
+ symlist = symbols_extract(bld, libs, dynamic=True)
+
+ symmap = {}
+ for libpath in symlist:
+ for sym in symlist[libpath]['PUBLIC']:
+ if sym == '_GLOBAL_OFFSET_TABLE_':
+ continue
+ if not sym in symmap:
+ symmap[sym] = set()
+ symmap[sym].add(libpath)
+ for sym in symmap:
+ if len(symmap[sym]) > 1:
+ for libpath in symmap[sym]:
+ if libpath in bld.env.library_dict:
+ report_duplicate(bld, binname, sym, symmap[sym], fail_on_error)
+ break
+
+def symbols_dupcheck(task, fail_on_error=False):
+ '''check for symbols defined in two different subsystems'''
+ bld = task.env.bld
+ tgt_list = get_tgt_list(bld)
+
+ targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
+
+ build_library_dict(bld, tgt_list)
+ for t in tgt_list:
+ if t.samba_type == 'BINARY':
+ binname = os.path.relpath(t.link_task.outputs[0].abspath(bld.env), os.getcwd())
+ symbols_dupcheck_binary(bld, binname, fail_on_error)
+
+
+def symbols_dupcheck_fatal(task):
+ '''check for symbols defined in two different subsystems (and fail if duplicates are found)'''
+ symbols_dupcheck(task, fail_on_error=True)
+
+
+def SYMBOL_CHECK(bld):
+ '''check our dependency lists'''
+ if Options.options.SYMBOLCHECK:
+ bld.SET_BUILD_GROUP('symbolcheck')
+ task = bld(rule=symbols_symbolcheck, always=True, name='symbol checking')
+ task.env.bld = bld
+
+ bld.SET_BUILD_GROUP('syslibcheck')
+ task = bld(rule=symbols_syslibcheck, always=True, name='syslib checking')
+ task.env.bld = bld
+
+ bld.SET_BUILD_GROUP('syslibcheck')
+ task = bld(rule=symbols_dupcheck, always=True, name='symbol duplicate checking')
+ task.env.bld = bld
+
+ if Options.options.WHYNEEDED:
+ bld.SET_BUILD_GROUP('syslibcheck')
+ task = bld(rule=symbols_whyneeded, always=True, name='check why a dependency is needed')
+ task.env.bld = bld
+
+
+Build.BuildContext.SYMBOL_CHECK = SYMBOL_CHECK
+
+def DUP_SYMBOL_CHECK(bld):
+ if Options.options.DUP_SYMBOLCHECK and bld.env.DEVELOPER:
+ '''check for duplicate symbols'''
+ bld.SET_BUILD_GROUP('syslibcheck')
+ task = bld(rule=symbols_dupcheck_fatal, always=True, name='symbol duplicate checking')
+ task.env.bld = bld
+
+Build.BuildContext.DUP_SYMBOL_CHECK = DUP_SYMBOL_CHECK