# a waf tool to extract symbols from object files or libraries # using nm, producing a set of exposed defined/undefined symbols import os, re, subprocess from waflib import Utils, Build, Options, Logs, Errors from waflib.Logs import debug from samba_utils import TO_LIST, LOCAL_CACHE, get_tgt_list # these are the data structures used in symbols.py: # # bld.env.symbol_map : dictionary mapping public symbol names to list of # subsystem names where that symbol exists # # t.in_library : list of libraries that t is in # # bld.env.public_symbols: set of public symbols for each subsystem # bld.env.used_symbols : set of used symbols for each subsystem # # bld.env.syslib_symbols: dictionary mapping system library name to set of symbols # for that library # bld.env.library_dict : dictionary mapping built library paths to subsystem names # # LOCAL_CACHE(bld, 'TARGET_TYPE') : dictionary mapping subsystem name to target type def symbols_extract(bld, objfiles, dynamic=False): '''extract symbols from objfile, returning a dictionary containing the set of undefined and public symbols for each file''' ret = {} # see if we can get some results from the nm cache if not bld.env.nm_cache: bld.env.nm_cache = {} objfiles = set(objfiles).copy() remaining = set() for obj in objfiles: if obj in bld.env.nm_cache: ret[obj] = bld.env.nm_cache[obj].copy() else: remaining.add(obj) objfiles = remaining if len(objfiles) == 0: return ret cmd = ["nm"] if dynamic: # needed for some .so files cmd.append("-D") cmd.extend(list(objfiles)) nmpipe = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout if len(objfiles) == 1: filename = list(objfiles)[0] ret[filename] = { "PUBLIC": set(), "UNDEFINED" : set()} for line in nmpipe: line = line.strip() if line.endswith(b':'): filename = line[:-1] ret[filename] = { "PUBLIC": set(), "UNDEFINED" : set() } continue cols = line.split(b" ") if cols == [b'']: continue # see if the line starts with an address if len(cols) == 3: symbol_type = cols[1] symbol = cols[2] else: symbol_type = cols[0] symbol = cols[1] if symbol_type in b"BDGTRVWSi": # its a public symbol ret[filename]["PUBLIC"].add(symbol) elif symbol_type in b"U": ret[filename]["UNDEFINED"].add(symbol) # add to the cache for obj in objfiles: if obj in ret: bld.env.nm_cache[obj] = ret[obj].copy() else: bld.env.nm_cache[obj] = { "PUBLIC": set(), "UNDEFINED" : set() } return ret def real_name(name): if name.find(".objlist") != -1: name = name[:-8] return name def find_ldd_path(bld, libname, binary): '''find the path to the syslib we will link against''' ret = None if not bld.env.syslib_paths: bld.env.syslib_paths = {} if libname in bld.env.syslib_paths: return bld.env.syslib_paths[libname] lddpipe = subprocess.Popen(['ldd', binary], stdout=subprocess.PIPE).stdout for line in lddpipe: line = line.strip() cols = line.split(b" ") if len(cols) < 3 or cols[1] != b"=>": continue if cols[0].startswith(b"libc."): # save this one too bld.env.libc_path = cols[2] if cols[0].startswith(libname): ret = cols[2] bld.env.syslib_paths[libname] = ret return ret # some regular expressions for parsing readelf output re_sharedlib = re.compile(rb'Shared library: \[(.*)\]') # output from readelf could be `Library rpath` or `Libray runpath` re_rpath = re.compile(rb'Library (rpath|runpath): \[(.*)\]') def get_libs(bld, binname): '''find the list of linked libraries for any binary or library binname is the path to the binary/library on disk We do this using readelf instead of ldd as we need to avoid recursing into system libraries ''' # see if we can get the result from the ldd cache if not bld.env.lib_cache: bld.env.lib_cache = {} if binname in bld.env.lib_cache: return bld.env.lib_cache[binname].copy() rpath = [] libs = set() elfpipe = subprocess.Popen(['readelf', '--dynamic', binname], stdout=subprocess.PIPE).stdout for line in elfpipe: m = re_sharedlib.search(line) if m: libs.add(m.group(1)) m = re_rpath.search(line) if m: # output from Popen is always bytestr even in py3 rpath.extend(m.group(2).split(b":")) ret = set() for lib in libs: found = False for r in rpath: path = os.path.join(r, lib) if os.path.exists(path): ret.add(os.path.realpath(path)) found = True break if not found: # we didn't find this lib using rpath. It is probably a system # library, so to find the path to it we either need to use ldd # or we need to start parsing /etc/ld.so.conf* ourselves. We'll # use ldd for now, even though it is slow path = find_ldd_path(bld, lib, binname) if path: ret.add(os.path.realpath(path)) bld.env.lib_cache[binname] = ret.copy() return ret def get_libs_recursive(bld, binname, seen): '''find the recursive list of linked libraries for any binary or library binname is the path to the binary/library on disk. seen is a set used to prevent loops ''' if binname in seen: return set() ret = get_libs(bld, binname) seen.add(binname) for lib in ret: # we don't want to recurse into system libraries. If a system # library that we use (eg. libcups) happens to use another library # (such as libkrb5) which contains common symbols with our own # libraries, then that is not an error if lib in bld.env.library_dict: ret = ret.union(get_libs_recursive(bld, lib, seen)) return ret def find_syslib_path(bld, libname, deps): '''find the path to the syslib we will link against''' # the strategy is to use the targets that depend on the library, and run ldd # on it to find the real location of the library that is used linkpath = deps[0].link_task.outputs[0].abspath(bld.env) if libname == "python": libname += bld.env.PYTHON_VERSION return find_ldd_path(bld, "lib%s" % libname.lower(), linkpath) def build_symbol_sets(bld, tgt_list): '''build the public_symbols and undefined_symbols attributes for each target''' if bld.env.public_symbols: return objlist = [] # list of object file objmap = {} # map from object filename to target (subsystem) name for t in tgt_list: t.public_symbols = set() t.undefined_symbols = set() t.used_symbols = set() for tsk in getattr(t, 'compiled_tasks', []): for output in tsk.outputs: objpath = output.abspath(bld.env) objlist.append(objpath) objmap[objpath] = t symbols = symbols_extract(bld, objlist) for obj in objlist: t = objmap[obj] t.public_symbols = t.public_symbols.union(symbols[obj]["PUBLIC"]) t.undefined_symbols = t.undefined_symbols.union(symbols[obj]["UNDEFINED"]) t.used_symbols = t.used_symbols.union(symbols[obj]["UNDEFINED"]) t.undefined_symbols = t.undefined_symbols.difference(t.public_symbols) # and the reverse map of public symbols to subsystem name bld.env.symbol_map = {} for t in tgt_list: for s in t.public_symbols: if not s in bld.env.symbol_map: bld.env.symbol_map[s] = [] bld.env.symbol_map[s].append(real_name(t.sname)) targets = LOCAL_CACHE(bld, 'TARGET_TYPE') bld.env.public_symbols = {} for t in tgt_list: name = real_name(t.sname) if name in bld.env.public_symbols: bld.env.public_symbols[name] = bld.env.public_symbols[name].union(t.public_symbols) else: bld.env.public_symbols[name] = t.public_symbols if t.samba_type in ['LIBRARY', 'PLUGIN']: for dep in t.add_objects: t2 = bld.get_tgen_by_name(dep) bld.ASSERT(t2 is not None, "Library '%s' has unknown dependency '%s'" % (name, dep)) bld.env.public_symbols[name] = bld.env.public_symbols[name].union(t2.public_symbols) bld.env.used_symbols = {} for t in tgt_list: name = real_name(t.sname) if name in bld.env.used_symbols: bld.env.used_symbols[name] = bld.env.used_symbols[name].union(t.used_symbols) else: bld.env.used_symbols[name] = t.used_symbols if t.samba_type in ['LIBRARY', 'PLUGIN']: for dep in t.add_objects: t2 = bld.get_tgen_by_name(dep) bld.ASSERT(t2 is not None, "Library '%s' has unknown dependency '%s'" % (name, dep)) bld.env.used_symbols[name] = bld.env.used_symbols[name].union(t2.used_symbols) def build_library_dict(bld, tgt_list): '''build the library_dict dictionary''' if bld.env.library_dict: return bld.env.library_dict = {} for t in tgt_list: if t.samba_type in [ 'LIBRARY', 'PLUGIN', 'PYTHON' ]: linkpath = os.path.realpath(t.link_task.outputs[0].abspath(bld.env)) bld.env.library_dict[linkpath] = t.sname def build_syslib_sets(bld, tgt_list): '''build the public_symbols for all syslibs''' if bld.env.syslib_symbols: return # work out what syslibs we depend on, and what targets those are used in syslibs = {} objmap = {} for t in tgt_list: if getattr(t, 'uselib', []) and t.samba_type in [ 'LIBRARY', 'PLUGIN', 'BINARY', 'PYTHON' ]: for lib in t.uselib: if lib in ['PYEMBED', 'PYEXT']: lib = "python" if not lib in syslibs: syslibs[lib] = [] syslibs[lib].append(t) # work out the paths to each syslib syslib_paths = [] for lib in syslibs: path = find_syslib_path(bld, lib, syslibs[lib]) if path is None: Logs.warn("Unable to find syslib path for %s" % lib) if path is not None: syslib_paths.append(path) objmap[path] = lib.lower() # add in libc syslib_paths.append(bld.env.libc_path) objmap[bld.env.libc_path] = 'c' symbols = symbols_extract(bld, syslib_paths, dynamic=True) # keep a map of syslib names to public symbols bld.env.syslib_symbols = {} for lib in symbols: bld.env.syslib_symbols[lib] = symbols[lib]["PUBLIC"] # add to the map of symbols to dependencies for lib in symbols: for sym in symbols[lib]["PUBLIC"]: if not sym in bld.env.symbol_map: bld.env.symbol_map[sym] = [] bld.env.symbol_map[sym].append(objmap[lib]) # keep the libc symbols as well, as these are useful for some of the # sanity checks bld.env.libc_symbols = symbols[bld.env.libc_path]["PUBLIC"] # add to the combined map of dependency name to public_symbols for lib in bld.env.syslib_symbols: bld.env.public_symbols[objmap[lib]] = bld.env.syslib_symbols[lib] def build_autodeps(bld, t): '''build the set of dependencies for a target''' deps = set() name = real_name(t.sname) targets = LOCAL_CACHE(bld, 'TARGET_TYPE') for sym in t.undefined_symbols: if sym in t.public_symbols: continue if sym in bld.env.symbol_map: depname = bld.env.symbol_map[sym] if depname == [ name ]: # self dependencies aren't interesting continue if t.in_library == depname: # no need to depend on the library we are part of continue if depname[0] in ['c', 'python']: # these don't go into autodeps continue if targets[depname[0]] in [ 'SYSLIB' ]: deps.add(depname[0]) continue t2 = bld.get_tgen_by_name(depname[0]) if len(t2.in_library) != 1: deps.add(depname[0]) continue if t2.in_library == t.in_library: # if we're part of the same library, we don't need to autodep continue deps.add(t2.in_library[0]) t.autodeps = deps def build_library_names(bld, tgt_list): '''add a in_library attribute to all targets that are part of a library''' if bld.env.done_build_library_names: return for t in tgt_list: t.in_library = [] for t in tgt_list: if t.samba_type in ['LIBRARY', 'PLUGIN']: for obj in t.samba_deps_extended: t2 = bld.get_tgen_by_name(obj) if t2 and t2.samba_type in [ 'SUBSYSTEM', 'BUILTIN', 'ASN1' ]: if not t.sname in t2.in_library: t2.in_library.append(t.sname) bld.env.done_build_library_names = True def check_library_deps(bld, t): '''check that all the autodeps that have mutual dependency of this target are in the same library as the target''' name = real_name(t.sname) if len(t.in_library) > 1: Logs.warn("WARNING: Target '%s' in multiple libraries: %s" % (t.sname, t.in_library)) for dep in t.autodeps: t2 = bld.get_tgen_by_name(dep) if t2 is None: continue for dep2 in t2.autodeps: if dep2 == name and t.in_library != t2.in_library: Logs.warn("WARNING: mutual dependency %s <=> %s" % (name, real_name(t2.sname))) Logs.warn("Libraries should match. %s != %s" % (t.in_library, t2.in_library)) # raise Errors.WafError("illegal mutual dependency") def check_syslib_collisions(bld, tgt_list): '''check if a target has any symbol collisions with a syslib We do not want any code in Samba to use a symbol name from a system library. The chance of that causing problems is just too high. Note that libreplace uses a rep_XX approach of renaming symbols via macros ''' has_error = False for t in tgt_list: for lib in bld.env.syslib_symbols: common = t.public_symbols.intersection(bld.env.syslib_symbols[lib]) if common: Logs.error("ERROR: Target '%s' has symbols '%s' which is also in syslib '%s'" % (t.sname, common, lib)) has_error = True if has_error: raise Errors.WafError("symbols in common with system libraries") def check_dependencies(bld, t): '''check for dependencies that should be changed''' if bld.get_tgen_by_name(t.sname + ".objlist"): return targets = LOCAL_CACHE(bld, 'TARGET_TYPE') remaining = t.undefined_symbols.copy() remaining = remaining.difference(t.public_symbols) sname = real_name(t.sname) deps = set(t.samba_deps) for d in t.samba_deps: if targets[d] in [ 'EMPTY', 'DISABLED', 'SYSLIB', 'GENERATOR' ]: continue bld.ASSERT(d in bld.env.public_symbols, "Failed to find symbol list for dependency '%s'" % d) diff = remaining.intersection(bld.env.public_symbols[d]) if not diff and targets[sname] != 'LIBRARY': Logs.info("Target '%s' has no dependency on %s" % (sname, d)) else: remaining = remaining.difference(diff) t.unsatisfied_symbols = set() needed = {} for sym in remaining: if sym in bld.env.symbol_map: dep = bld.env.symbol_map[sym] if not dep[0] in needed: needed[dep[0]] = set() needed[dep[0]].add(sym) else: t.unsatisfied_symbols.add(sym) for dep in needed: Logs.info("Target '%s' should add dep '%s' for symbols %s" % (sname, dep, " ".join(needed[dep]))) def check_syslib_dependencies(bld, t): '''check for syslib depenencies''' if bld.get_tgen_by_name(t.sname + ".objlist"): return sname = real_name(t.sname) remaining = set() features = TO_LIST(t.features) if 'pyembed' in features or 'pyext' in features: if 'python' in bld.env.public_symbols: t.unsatisfied_symbols = t.unsatisfied_symbols.difference(bld.env.public_symbols['python']) needed = {} for sym in t.unsatisfied_symbols: if sym in bld.env.symbol_map: dep = bld.env.symbol_map[sym][0] if dep == 'c': continue if not dep in needed: needed[dep] = set() needed[dep].add(sym) else: remaining.add(sym) for dep in needed: Logs.info("Target '%s' should add syslib dep '%s' for symbols %s" % (sname, dep, " ".join(needed[dep]))) if remaining: debug("deps: Target '%s' has unsatisfied symbols: %s" % (sname, " ".join(remaining))) def symbols_symbolcheck(task): '''check the internal dependency lists''' bld = task.env.bld tgt_list = get_tgt_list(bld) build_symbol_sets(bld, tgt_list) build_library_names(bld, tgt_list) for t in tgt_list: t.autodeps = set() if getattr(t, 'source', ''): build_autodeps(bld, t) for t in tgt_list: check_dependencies(bld, t) for t in tgt_list: check_library_deps(bld, t) def symbols_syslibcheck(task): '''check the syslib dependencies''' bld = task.env.bld tgt_list = get_tgt_list(bld) build_syslib_sets(bld, tgt_list) check_syslib_collisions(bld, tgt_list) for t in tgt_list: check_syslib_dependencies(bld, t) def symbols_whyneeded(task): """check why 'target' needs to link to 'subsystem'""" bld = task.env.bld tgt_list = get_tgt_list(bld) why = Options.options.WHYNEEDED.split(":") if len(why) != 2: raise Errors.WafError("usage: WHYNEEDED=TARGET:DEPENDENCY") target = why[0] subsystem = why[1] build_symbol_sets(bld, tgt_list) build_library_names(bld, tgt_list) build_syslib_sets(bld, tgt_list) Logs.info("Checking why %s needs to link to %s" % (target, subsystem)) if not target in bld.env.used_symbols: Logs.warn("unable to find target '%s' in used_symbols dict" % target) return if not subsystem in bld.env.public_symbols: Logs.warn("unable to find subsystem '%s' in public_symbols dict" % subsystem) return overlap = bld.env.used_symbols[target].intersection(bld.env.public_symbols[subsystem]) if not overlap: Logs.info("target '%s' doesn't use any public symbols from '%s'" % (target, subsystem)) else: Logs.info("target '%s' uses symbols %s from '%s'" % (target, overlap, subsystem)) def report_duplicate(bld, binname, sym, libs, fail_on_error): '''report duplicated symbols''' if sym in ['_init', '_fini', '_edata', '_end', '__bss_start']: return libnames = [] for lib in libs: if lib in bld.env.library_dict: libnames.append(bld.env.library_dict[lib]) else: libnames.append(lib) if fail_on_error: raise Errors.WafError("%s: Symbol %s linked in multiple libraries %s" % (binname, sym, libnames)) else: print("%s: Symbol %s linked in multiple libraries %s" % (binname, sym, libnames)) def symbols_dupcheck_binary(bld, binname, fail_on_error): '''check for duplicated symbols in one binary''' libs = get_libs_recursive(bld, binname, set()) symlist = symbols_extract(bld, libs, dynamic=True) symmap = {} for libpath in symlist: for sym in symlist[libpath]['PUBLIC']: if sym == '_GLOBAL_OFFSET_TABLE_': continue if not sym in symmap: symmap[sym] = set() symmap[sym].add(libpath) for sym in symmap: if len(symmap[sym]) > 1: for libpath in symmap[sym]: if libpath in bld.env.library_dict: report_duplicate(bld, binname, sym, symmap[sym], fail_on_error) break def symbols_dupcheck(task, fail_on_error=False): '''check for symbols defined in two different subsystems''' bld = task.env.bld tgt_list = get_tgt_list(bld) targets = LOCAL_CACHE(bld, 'TARGET_TYPE') build_library_dict(bld, tgt_list) for t in tgt_list: if t.samba_type == 'BINARY': binname = os.path.relpath(t.link_task.outputs[0].abspath(bld.env), os.getcwd()) symbols_dupcheck_binary(bld, binname, fail_on_error) def symbols_dupcheck_fatal(task): '''check for symbols defined in two different subsystems (and fail if duplicates are found)''' symbols_dupcheck(task, fail_on_error=True) def SYMBOL_CHECK(bld): '''check our dependency lists''' if Options.options.SYMBOLCHECK: bld.SET_BUILD_GROUP('symbolcheck') task = bld(rule=symbols_symbolcheck, always=True, name='symbol checking') task.env.bld = bld bld.SET_BUILD_GROUP('syslibcheck') task = bld(rule=symbols_syslibcheck, always=True, name='syslib checking') task.env.bld = bld bld.SET_BUILD_GROUP('syslibcheck') task = bld(rule=symbols_dupcheck, always=True, name='symbol duplicate checking') task.env.bld = bld if Options.options.WHYNEEDED: bld.SET_BUILD_GROUP('syslibcheck') task = bld(rule=symbols_whyneeded, always=True, name='check why a dependency is needed') task.env.bld = bld Build.BuildContext.SYMBOL_CHECK = SYMBOL_CHECK def DUP_SYMBOL_CHECK(bld): if Options.options.DUP_SYMBOLCHECK and bld.env.DEVELOPER: '''check for duplicate symbols''' bld.SET_BUILD_GROUP('syslibcheck') task = bld(rule=symbols_dupcheck_fatal, always=True, name='symbol duplicate checking') task.env.bld = bld Build.BuildContext.DUP_SYMBOL_CHECK = DUP_SYMBOL_CHECK