diff options
Diffstat (limited to 'testing/mozbase/mozgeckoprofiler')
10 files changed, 1409 insertions, 0 deletions
diff --git a/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/__init__.py b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/__init__.py new file mode 100644 index 0000000000..ce0337db09 --- /dev/null +++ b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/__init__.py @@ -0,0 +1,17 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +mozgeckoprofiler has utilities to symbolicate and load gecko profiles. +""" +from .profiling import save_gecko_profile, symbolicate_profile_json +from .symbolication import ProfileSymbolicator +from .viewgeckoprofile import view_gecko_profile + +__all__ = [ + "save_gecko_profile", + "symbolicate_profile_json", + "ProfileSymbolicator", + "view_gecko_profile", +] diff --git a/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/dump_syms_mac b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/dump_syms_mac Binary files differnew file mode 100755 index 0000000000..e9b8edf879 --- /dev/null +++ b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/dump_syms_mac diff --git a/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/profiling.py b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/profiling.py new file mode 100644 index 0000000000..fca7465d23 --- /dev/null +++ b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/profiling.py @@ -0,0 +1,85 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import json +import os +import shutil +import tempfile + +from mozlog import get_proxy_logger + +from .symbolication import ProfileSymbolicator + +LOG = get_proxy_logger("profiler") + + +def save_gecko_profile(profile, filename): + with open(filename, "w") as f: + json.dump(profile, f) + + +def symbolicate_profile_json(profile_path, objdir_path): + """ + Symbolicate a single JSON profile. + """ + temp_dir = tempfile.mkdtemp() + missing_symbols_zip = os.path.join(temp_dir, "missingsymbols.zip") + + firefox_symbol_path = os.path.join(objdir_path, "dist", "crashreporter-symbols") + if not os.path.isdir(firefox_symbol_path): + os.mkdir(firefox_symbol_path) + + windows_symbol_path = os.path.join(temp_dir, "windows") + os.mkdir(windows_symbol_path) + + symbol_paths = {"FIREFOX": firefox_symbol_path, "WINDOWS": windows_symbol_path} + + symbolicator = ProfileSymbolicator( + { + # Trace-level logging (verbose) + "enableTracing": 0, + # Fallback server if symbol is not found locally + "remoteSymbolServer": "https://symbols.mozilla.org/symbolicate/v4", + # Maximum number of symbol files to keep in memory + "maxCacheEntries": 2000000, + # Frequency of checking for recent symbols to + # cache (in hours) + "prefetchInterval": 12, + # Oldest file age to prefetch (in hours) + "prefetchThreshold": 48, + # Maximum number of library versions to pre-fetch + # per library + "prefetchMaxSymbolsPerLib": 3, + # Default symbol lookup directories + "defaultApp": "FIREFOX", + "defaultOs": "WINDOWS", + # Paths to .SYM files, expressed internally as a + # mapping of app or platform names to directories + # Note: App & OS names from requests are converted + # to all-uppercase internally + "symbolPaths": symbol_paths, + } + ) + + LOG.info( + "Symbolicating the performance profile... This could take a couple " + "of minutes." + ) + + try: + with open(profile_path, "r", encoding="utf-8") as profile_file: + profile = json.load(profile_file) + symbolicator.dump_and_integrate_missing_symbols(profile, missing_symbols_zip) + symbolicator.symbolicate_profile(profile) + # Overwrite the profile in place. + save_gecko_profile(profile, profile_path) + except MemoryError: + LOG.error( + "Ran out of memory while trying" + " to symbolicate profile {0}".format(profile_path) + ) + except Exception as e: + LOG.error("Encountered an exception during profile symbolication") + LOG.error(e) + + shutil.rmtree(temp_dir) diff --git a/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/symFileManager.py b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/symFileManager.py new file mode 100644 index 0000000000..e93cbc7b02 --- /dev/null +++ b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/symFileManager.py @@ -0,0 +1,353 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import itertools +import os +import re +import threading +import time +from bisect import bisect + +from mozlog import get_proxy_logger + +LOG = get_proxy_logger("profiler") + +# Libraries to keep prefetched +PREFETCHED_LIBS = ["xul.pdb", "firefox.pdb"] + + +class SymbolInfo: + def __init__(self, addressMap): + self.sortedAddresses = sorted(addressMap.keys()) + self.sortedSymbols = [addressMap[address] for address in self.sortedAddresses] + self.entryCount = len(self.sortedAddresses) + + # TODO: Add checks for address < funcEnd ? + def Lookup(self, address): + nearest = bisect(self.sortedAddresses, address) - 1 + if nearest < 0: + return None + return self.sortedSymbols[nearest] + + def GetEntryCount(self): + return self.entryCount + + +# Singleton for .sym / .nmsym file cache management + + +class SymFileManager: + """This class fetches symbols from files and caches the results. + + options (obj) + symbolPaths : dictionary + Paths to .SYM files, expressed internally as a mapping of app or platform + names to directories. App & OS names from requests are converted to + all-uppercase internally + e.g. { "FIREFOX": "/tmp/path" } + maxCacheEntries : number + Maximum number of symbol files to keep in memory + prefetchInterval : number + Frequency of checking for recent symbols to cache (in hours) + prefetchThreshold : number + Oldest file age to prefetch (in hours) + prefetchMaxSymbolsPerLib : (number) + Maximum number of library versions to pre-fetch per library + """ + + sCache = {} + sCacheCount = 0 + sCacheLock = threading.Lock() + sMruSymbols = [] + + sOptions = {} + sCallbackTimer = None + + def __init__(self, options): + self.sOptions = options + + def GetLibSymbolMap(self, libName, breakpadId, symbolSources): + # Empty lib name means client couldn't associate frame with any lib + if libName == "": + return None + + # Check cache first + libSymbolMap = None + self.sCacheLock.acquire() + try: + if libName in self.sCache and breakpadId in self.sCache[libName]: + libSymbolMap = self.sCache[libName][breakpadId] + self.UpdateMruList(libName, breakpadId) + finally: + self.sCacheLock.release() + + if libSymbolMap is None: + LOG.debug("Need to fetch PDB file for " + libName + " " + breakpadId) + + # Guess the name of the .sym or .nmsym file on disk + if libName[-4:] == ".pdb": + symFileNameWithoutExtension = re.sub(r"\.[^\.]+$", "", libName) + else: + symFileNameWithoutExtension = libName + + # Look in the symbol dirs for this .sym or .nmsym file + for extension, source in itertools.product( + [".sym", ".nmsym"], symbolSources + ): + symFileName = symFileNameWithoutExtension + extension + pathSuffix = ( + os.sep + libName + os.sep + breakpadId + os.sep + symFileName + ) + path = self.sOptions["symbolPaths"][source] + pathSuffix + libSymbolMap = self.FetchSymbolsFromFile(path) + if libSymbolMap: + break + + if not libSymbolMap: + LOG.debug("No matching sym files, tried " + str(symbolSources)) + return None + + LOG.debug( + "Storing libSymbolMap under [" + libName + "][" + breakpadId + "]" + ) + self.sCacheLock.acquire() + try: + self.MaybeEvict(libSymbolMap.GetEntryCount()) + if libName not in self.sCache: + self.sCache[libName] = {} + self.sCache[libName][breakpadId] = libSymbolMap + self.sCacheCount += libSymbolMap.GetEntryCount() + self.UpdateMruList(libName, breakpadId) + LOG.debug( + str(self.sCacheCount) + + " symbols in cache after fetching symbol file" + ) + finally: + self.sCacheLock.release() + + return libSymbolMap + + def FetchSymbolsFromFile(self, path): + try: + symFile = open(path, "r") + except Exception as e: + LOG.debug("Error opening file " + path + ": " + str(e)) + return None + + LOG.debug("Parsing SYM file at " + path) + + try: + symbolMap = {} + lineNum = 0 + publicCount = 0 + funcCount = 0 + if path.endswith(".sym"): + for line in symFile: + lineNum += 1 + if line[0:7] == "PUBLIC ": + line = line.rstrip() + fields = line.split(" ") + if len(fields) < 4: + LOG.debug("Line " + str(lineNum) + " is messed") + continue + if fields[1] == "m": + address = int(fields[2], 16) + symbolMap[address] = " ".join(fields[4:]) + else: + address = int(fields[1], 16) + symbolMap[address] = " ".join(fields[3:]) + publicCount += 1 + elif line[0:5] == "FUNC ": + line = line.rstrip() + fields = line.split(" ") + if len(fields) < 5: + LOG.debug("Line " + str(lineNum) + " is messed") + continue + if fields[1] == "m": + address = int(fields[2], 16) + symbolMap[address] = " ".join(fields[5:]) + else: + address = int(fields[1], 16) + symbolMap[address] = " ".join(fields[4:]) + funcCount += 1 + elif path.endswith(".nmsym"): + addressLength = 0 + for line in symFile: + lineNum += 1 + if line.startswith(" "): + continue + if addressLength == 0: + addressLength = line.find(" ") + address = int(line[0:addressLength], 16) + # Some lines have the form + # "address space letter space symbol", + # some have the form "address space symbol". + # The letter has a meaning, but we ignore it. + if line[addressLength + 2] == " ": + symbol = line[addressLength + 3 :].rstrip() + else: + symbol = line[addressLength + 1 :].rstrip() + symbolMap[address] = symbol + publicCount += 1 + except Exception: + LOG.error("Error parsing SYM file " + path) + return None + + logString = "Found " + str(len(symbolMap)) + " unique entries from " + logString += ( + str(publicCount) + " PUBLIC lines, " + str(funcCount) + " FUNC lines" + ) + LOG.debug(logString) + + return SymbolInfo(symbolMap) + + def PrefetchRecentSymbolFiles(self): + """This method runs in a loop. Use the options "prefetchThreshold" to adjust""" + global PREFETCHED_LIBS + + LOG.info("Prefetching recent symbol files") + # Schedule next timer callback + interval = self.sOptions["prefetchInterval"] * 60 * 60 + self.sCallbackTimer = threading.Timer(interval, self.PrefetchRecentSymbolFiles) + self.sCallbackTimer.start() + + thresholdTime = time.time() - self.sOptions["prefetchThreshold"] * 60 * 60 + symDirsToInspect = {} + for pdbName in PREFETCHED_LIBS: + symDirsToInspect[pdbName] = [] + topLibPath = self.sOptions["symbolPaths"]["FIREFOX"] + os.sep + pdbName + + try: + symbolDirs = os.listdir(topLibPath) + for symbolDir in symbolDirs: + candidatePath = topLibPath + os.sep + symbolDir + mtime = os.path.getmtime(candidatePath) + if mtime > thresholdTime: + symDirsToInspect[pdbName].append((mtime, candidatePath)) + except Exception as e: + LOG.error("Error while pre-fetching: " + str(e)) + + LOG.info( + "Found " + + str(len(symDirsToInspect[pdbName])) + + " new " + + pdbName + + " recent dirs" + ) + + # Only prefetch the most recent N entries + symDirsToInspect[pdbName].sort(reverse=True) + symDirsToInspect[pdbName] = symDirsToInspect[pdbName][ + : self.sOptions["prefetchMaxSymbolsPerLib"] + ] + + # Don't fetch symbols already in cache. + # Ideally, mutex would be held from check to insert in self.sCache, + # but we don't want to hold the lock during I/O. This won't cause + # inconsistencies. + self.sCacheLock.acquire() + try: + for pdbName in symDirsToInspect: + for (mtime, symbolDirPath) in symDirsToInspect[pdbName]: + pdbId = os.path.basename(symbolDirPath) + if pdbName in self.sCache and pdbId in self.sCache[pdbName]: + symDirsToInspect[pdbName].remove((mtime, symbolDirPath)) + finally: + self.sCacheLock.release() + + # Read all new symbol files in at once + fetchedSymbols = {} + fetchedCount = 0 + for pdbName in symDirsToInspect: + # The corresponding symbol file name ends with .sym + symFileName = re.sub(r"\.[^\.]+$", ".sym", pdbName) + + for (mtime, symbolDirPath) in symDirsToInspect[pdbName]: + pdbId = os.path.basename(symbolDirPath) + symbolFilePath = symbolDirPath + os.sep + symFileName + symbolInfo = self.FetchSymbolsFromFile(symbolFilePath) + if symbolInfo: + # Stop if the prefetched items are bigger than the cache + if ( + fetchedCount + symbolInfo.GetEntryCount() + > self.sOptions["maxCacheEntries"] + ): + break + fetchedSymbols[(pdbName, pdbId)] = symbolInfo + fetchedCount += symbolInfo.GetEntryCount() + else: + LOG.error("Couldn't fetch .sym file symbols for " + symbolFilePath) + continue + + # Insert new symbols into global symbol cache + self.sCacheLock.acquire() + try: + # Make room for the new symbols + self.MaybeEvict(fetchedCount) + + for (pdbName, pdbId) in fetchedSymbols: + if pdbName not in self.sCache: + self.sCache[pdbName] = {} + + if pdbId in self.sCache[pdbName]: + continue + + newSymbolFile = fetchedSymbols[(pdbName, pdbId)] + self.sCache[pdbName][pdbId] = newSymbolFile + self.sCacheCount += newSymbolFile.GetEntryCount() + + # Move new symbols to front of MRU list to give them a chance + self.UpdateMruList(pdbName, pdbId) + + finally: + self.sCacheLock.release() + + LOG.info("Finished prefetching recent symbol files") + + def UpdateMruList(self, pdbName, pdbId): + libId = (pdbName, pdbId) + if libId in self.sMruSymbols: + self.sMruSymbols.remove(libId) + self.sMruSymbols.insert(0, libId) + + def MaybeEvict(self, freeEntriesNeeded): + maxCacheSize = self.sOptions["maxCacheEntries"] + LOG.debug( + "Cache occupancy before MaybeEvict: " + + str(self.sCacheCount) + + "/" + + str(maxCacheSize) + ) + + if ( + self.sCacheCount == 0 + or self.sCacheCount + freeEntriesNeeded <= maxCacheSize + ): + # No need to lock mutex here, this doesn't need to be 100% + return + + # If adding the new entries would exceed the max cache size, + # evict so that cache is at 70% capacity after new entries added + numOldEntriesAfterEvict = max(0, (0.70 * maxCacheSize) - freeEntriesNeeded) + numToEvict = self.sCacheCount - numOldEntriesAfterEvict + + # Evict symbols until evict quota is met, starting with least recently + # used + for (pdbName, pdbId) in reversed(self.sMruSymbols): + if numToEvict <= 0: + break + + evicteeCount = self.sCache[pdbName][pdbId].GetEntryCount() + + del self.sCache[pdbName][pdbId] + self.sCacheCount -= evicteeCount + self.sMruSymbols.pop() + + numToEvict -= evicteeCount + + LOG.debug( + "Cache occupancy after MaybeEvict: " + + str(self.sCacheCount) + + "/" + + str(maxCacheSize) + ) diff --git a/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/symbolication.py b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/symbolication.py new file mode 100644 index 0000000000..ecec5c1d9d --- /dev/null +++ b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/symbolication.py @@ -0,0 +1,360 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import hashlib +import http.client +import os +import platform +import subprocess +import zipfile +from distutils import spawn + +import six +from mozlog import get_proxy_logger + +from .symbolicationRequest import SymbolicationRequest +from .symFileManager import SymFileManager + +LOG = get_proxy_logger("profiler") + +if six.PY2: + # Import for Python 2 + from cStringIO import StringIO as sio + from urllib2 import urlopen +else: + # Import for Python 3 + from io import BytesIO as sio + from urllib.request import urlopen + + # Symbolication is broken when using type 'str' in python 2.7, so we use 'basestring'. + # But for python 3.0 compatibility, 'basestring' isn't defined, but the 'str' type works. + # So we force 'basestring' to 'str'. + basestring = str + + +class SymbolError(Exception): + pass + + +class OSXSymbolDumper: + def __init__(self): + self.dump_syms_bin = os.path.join(os.path.dirname(__file__), "dump_syms_mac") + if not os.path.exists(self.dump_syms_bin): + raise SymbolError("No dump_syms_mac binary in this directory") + + def store_symbols( + self, lib_path, expected_breakpad_id, output_filename_without_extension + ): + """ + Returns the filename at which the .sym file was created, or None if no + symbols were dumped. + """ + output_filename = output_filename_without_extension + ".sym" + + def get_archs(filename): + """ + Find the list of architectures present in a Mach-O file. + """ + return ( + subprocess.Popen(["lipo", "-info", filename], stdout=subprocess.PIPE) + .communicate()[0] + .split(b":")[2] + .strip() + .split() + ) + + def process_file(arch): + proc = subprocess.Popen( + [self.dump_syms_bin, "-a", arch, lib_path], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + stdout, stderr = proc.communicate() + if proc.returncode != 0: + return None + + module = stdout.splitlines()[0] + bits = module.split(b" ", 4) + if len(bits) != 5: + return None + _, platform, cpu_arch, actual_breakpad_id, debug_file = bits + + if str(actual_breakpad_id, "utf-8") != expected_breakpad_id: + return None + + with open(output_filename, "wb") as f: + f.write(stdout) + return output_filename + + for arch in get_archs(lib_path): + result = process_file(arch) + if result is not None: + return result + return None + + +class LinuxSymbolDumper: + def __init__(self): + self.nm = spawn.find_executable("nm") + if not self.nm: + raise SymbolError("Could not find nm, necessary for symbol dumping") + + def store_symbols(self, lib_path, breakpad_id, output_filename_without_extension): + """ + Returns the filename at which the .sym file was created, or None if no + symbols were dumped. + """ + output_filename = output_filename_without_extension + ".nmsym" + + proc = subprocess.Popen( + [self.nm, "--demangle", lib_path], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + stdout, stderr = proc.communicate() + + if proc.returncode != 0: + return + + with open(output_filename, "wb") as f: + f.write(stdout) + + # Append nm -D output to the file. On Linux, most system libraries + # have no "normal" symbols, but they have "dynamic" symbols, which + # nm -D shows. + proc = subprocess.Popen( + [self.nm, "--demangle", "-D", lib_path], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + stdout, stderr = proc.communicate() + if proc.returncode == 0: + f.write(stdout) + return output_filename + + +class ProfileSymbolicator: + """This class orchestrates symbolication for a Gecko profile. + + It can be used by multiple pieces of testing infrastructure that generate Gecko + performance profiles. + + Args: + options (obj): See SymFileManager for details on these options. + """ + + def __init__(self, options): + self.options = options + self.sym_file_manager = SymFileManager(self.options) + self.symbol_dumper = self.get_symbol_dumper() + + def get_symbol_dumper(self): + try: + if platform.system() == "Darwin": + return OSXSymbolDumper() + elif platform.system() == "Linux": + return LinuxSymbolDumper() + except SymbolError: + return None + + def integrate_symbol_zip_from_url(self, symbol_zip_url): + if self.have_integrated(symbol_zip_url): + return + LOG.info( + "Retrieving symbol zip from {symbol_zip_url}...".format( + symbol_zip_url=symbol_zip_url + ) + ) + try: + io = urlopen(symbol_zip_url, None, 30) + with zipfile.ZipFile(sio(io.read())) as zf: + self.integrate_symbol_zip(zf) + self._create_file_if_not_exists(self._marker_file(symbol_zip_url)) + except (IOError, http.client.IncompleteRead): + LOG.info("Symbol zip request failed.") + + def integrate_symbol_zip_from_file(self, filename): + if self.have_integrated(filename): + return + with open(filename, "rb") as f: + with zipfile.ZipFile(f) as zf: + self.integrate_symbol_zip(zf) + self._create_file_if_not_exists(self._marker_file(filename)) + + def _create_file_if_not_exists(self, filename): + try: + os.makedirs(os.path.dirname(filename)) + except OSError: + pass + try: + open(filename, "a").close() + except IOError: + pass + + def integrate_symbol_zip(self, symbol_zip_file): + symbol_zip_file.extractall(self.options["symbolPaths"]["FIREFOX"]) + + def _marker_file(self, symbol_zip_url): + marker_dir = os.path.join(self.options["symbolPaths"]["FIREFOX"], ".markers") + return os.path.join( + marker_dir, hashlib.sha1(symbol_zip_url.encode("utf-8")).hexdigest() + ) + + def have_integrated(self, symbol_zip_url): + return os.path.isfile(self._marker_file(symbol_zip_url)) + + def get_unknown_modules_in_profile(self, profile_json): + if "libs" not in profile_json: + return [] + shared_libraries = profile_json["libs"] + memoryMap = [] + for lib in shared_libraries: + memoryMap.append([lib["debugName"], lib["breakpadId"]]) + + rawRequest = { + "stacks": [[]], + "memoryMap": memoryMap, + "version": 4, + "symbolSources": ["FIREFOX", "WINDOWS"], + } + request = SymbolicationRequest(self.sym_file_manager, rawRequest) + if not request.isValidRequest: + return [] + request.Symbolicate(0) # This sets request.knownModules + + unknown_modules = [] + for i, lib in enumerate(shared_libraries): + if not request.knownModules[i]: + unknown_modules.append(lib) + return unknown_modules + + def dump_and_integrate_missing_symbols(self, profile_json, symbol_zip_path): + if not self.symbol_dumper: + return + + unknown_modules = self.get_unknown_modules_in_profile(profile_json) + if not unknown_modules: + return + + # We integrate the dumped symbols by dumping them directly into our + # symbol directory. + output_dir = self.options["symbolPaths"]["FIREFOX"] + + # Additionally, we add all dumped symbol files to the missingsymbols + # zip file. + with zipfile.ZipFile(symbol_zip_path, "a", zipfile.ZIP_DEFLATED) as zf: + for lib in unknown_modules: + self.dump_and_integrate_symbols_for_lib(lib, output_dir, zf) + + def dump_and_integrate_symbols_for_lib(self, lib, output_dir, zip): + name = lib["debugName"] + expected_name_without_extension = os.path.join(name, lib["breakpadId"], name) + for extension in [".sym", ".nmsym"]: + expected_name = expected_name_without_extension + extension + if expected_name in zip.namelist(): + # No need to dump the symbols again if we already have it in + # the missingsymbols zip file from a previous run. + zip.extract(expected_name, output_dir) + return + + lib_path = lib["path"] + if not os.path.exists(lib_path): + return + + output_filename_without_extension = os.path.join( + output_dir, expected_name_without_extension + ) + store_path = os.path.dirname(output_filename_without_extension) + if not os.path.exists(store_path): + os.makedirs(store_path) + + # Dump the symbols. + sym_file = self.symbol_dumper.store_symbols( + lib_path, lib["breakpadId"], output_filename_without_extension + ) + if sym_file: + rootlen = len(os.path.join(output_dir, "_")) - 1 + output_filename = sym_file[rootlen:] + if output_filename not in zip.namelist(): + zip.write(sym_file, output_filename) + + def symbolicate_profile(self, profile_json): + if "libs" not in profile_json: + return + + shared_libraries = profile_json["libs"] + addresses = self._find_addresses(profile_json) + symbols_to_resolve = self._assign_symbols_to_libraries( + addresses, shared_libraries + ) + symbolication_table = self._resolve_symbols(symbols_to_resolve) + self._substitute_symbols(profile_json, symbolication_table) + + for process in profile_json["processes"]: + self.symbolicate_profile(process) + + def _find_addresses(self, profile_json): + addresses = set() + for thread in profile_json["threads"]: + if isinstance(thread, basestring): + continue + for s in thread["stringTable"]: + if s[0:2] == "0x": + addresses.add(s) + return addresses + + def _substitute_symbols(self, profile_json, symbolication_table): + for thread in profile_json["threads"]: + if isinstance(thread, basestring): + continue + for i, s in enumerate(thread["stringTable"]): + thread["stringTable"][i] = symbolication_table.get(s, s) + + def _get_containing_library(self, address, libs): + left = 0 + right = len(libs) - 1 + while left <= right: + mid = (left + right) // 2 + if address >= libs[mid]["end"]: + left = mid + 1 + elif address < libs[mid]["start"]: + right = mid - 1 + else: + return libs[mid] + return None + + def _assign_symbols_to_libraries(self, addresses, shared_libraries): + libs_with_symbols = {} + for address in addresses: + lib = self._get_containing_library(int(address, 0), shared_libraries) + if not lib: + continue + if lib["start"] not in libs_with_symbols: + libs_with_symbols[lib["start"]] = {"library": lib, "symbols": set()} + libs_with_symbols[lib["start"]]["symbols"].add(address) + # pylint: disable=W1656 + return libs_with_symbols.values() + + def _resolve_symbols(self, symbols_to_resolve): + memoryMap = [] + processedStack = [] + all_symbols = [] + for moduleIndex, library_with_symbols in enumerate(symbols_to_resolve): + lib = library_with_symbols["library"] + symbols = library_with_symbols["symbols"] + memoryMap.append([lib["debugName"], lib["breakpadId"]]) + all_symbols += symbols + for symbol in symbols: + processedStack.append([moduleIndex, int(symbol, 0) - lib["start"]]) + + rawRequest = { + "stacks": [processedStack], + "memoryMap": memoryMap, + "version": 4, + "symbolSources": ["FIREFOX", "WINDOWS"], + } + request = SymbolicationRequest(self.sym_file_manager, rawRequest) + if not request.isValidRequest: + return {} + symbolicated_stack = request.Symbolicate(0) + return dict(zip(all_symbols, symbolicated_stack)) diff --git a/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/symbolicationRequest.py b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/symbolicationRequest.py new file mode 100644 index 0000000000..bf8e0230d9 --- /dev/null +++ b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/symbolicationRequest.py @@ -0,0 +1,317 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import json +import re + +import six +from mozlog import get_proxy_logger + +LOG = get_proxy_logger("profiler") + +# Precompiled regex for validating lib names +# Empty lib name means client couldn't associate frame with any lib +gLibNameRE = re.compile("[0-9a-zA-Z_+\-\.]*$") + +# Maximum number of times a request can be forwarded to a different server +# for symbolication. Also prevents loops. +MAX_FORWARDED_REQUESTS = 3 + +if six.PY2: + # Import for Python 2 + from urllib2 import Request, urlopen +else: + # Import for Python 3 + from urllib.request import Request, urlopen + + # Symbolication is broken when using type 'str' in python 2.7, so we use 'basestring'. + # But for python 3.0 compatibility, 'basestring' isn't defined, but the 'str' type works. + # So we force 'basestring' to 'str'. + basestring = str + + +class ModuleV3: + def __init__(self, libName, breakpadId): + self.libName = libName + self.breakpadId = breakpadId + + +def getModuleV3(libName, breakpadId): + if not isinstance(libName, basestring) or not gLibNameRE.match(libName): + LOG.debug("Bad library name: " + str(libName)) + return None + + if not isinstance(breakpadId, basestring): + LOG.debug("Bad breakpad id: " + str(breakpadId)) + return None + + return ModuleV3(libName, breakpadId) + + +class SymbolicationRequest: + def __init__(self, symFileManager, rawRequests): + self.Reset() + self.symFileManager = symFileManager + self.stacks = [] + self.combinedMemoryMap = [] + self.knownModules = [] + self.symbolSources = [] + self.ParseRequests(rawRequests) + + def Reset(self): + self.symFileManager = None + self.isValidRequest = False + self.combinedMemoryMap = [] + self.knownModules = [] + self.stacks = [] + self.forwardCount = 0 + + def ParseRequests(self, rawRequests): + self.isValidRequest = False + + try: + if not isinstance(rawRequests, dict): + LOG.debug("Request is not a dictionary") + return + + if "version" not in rawRequests: + LOG.debug("Request is missing 'version' field") + return + version = rawRequests["version"] + if version != 4: + LOG.debug("Invalid version: %s" % version) + return + + if "forwarded" in rawRequests: + if not isinstance(rawRequests["forwarded"], (int, int)): + LOG.debug("Invalid 'forwards' field: %s" % rawRequests["forwarded"]) + return + self.forwardCount = rawRequests["forwarded"] + + # Client specifies which sets of symbols should be used + if "symbolSources" in rawRequests: + try: + sourceList = [x.upper() for x in rawRequests["symbolSources"]] + for source in sourceList: + if source in self.symFileManager.sOptions["symbolPaths"]: + self.symbolSources.append(source) + else: + LOG.debug("Unrecognized symbol source: " + source) + continue + except Exception: + self.symbolSources = [] + pass + + if not self.symbolSources: + self.symbolSources.append(self.symFileManager.sOptions["defaultApp"]) + self.symbolSources.append(self.symFileManager.sOptions["defaultOs"]) + + if "memoryMap" not in rawRequests: + LOG.debug("Request is missing 'memoryMap' field") + return + memoryMap = rawRequests["memoryMap"] + if not isinstance(memoryMap, list): + LOG.debug("'memoryMap' field in request is not a list") + + if "stacks" not in rawRequests: + LOG.debug("Request is missing 'stacks' field") + return + stacks = rawRequests["stacks"] + if not isinstance(stacks, list): + LOG.debug("'stacks' field in request is not a list") + return + + # Check memory map is well-formatted + cleanMemoryMap = [] + for module in memoryMap: + if not isinstance(module, list): + LOG.debug("Entry in memory map is not a list: " + str(module)) + return + + if len(module) != 2: + LOG.debug( + "Entry in memory map is not a 2 item list: " + str(module) + ) + return + module = getModuleV3(*module) + + if module is None: + return + + cleanMemoryMap.append(module) + + self.combinedMemoryMap = cleanMemoryMap + self.knownModules = [False] * len(self.combinedMemoryMap) + + # Check stack is well-formatted + for stack in stacks: + if not isinstance(stack, list): + LOG.debug("stack is not a list") + return + for entry in stack: + if not isinstance(entry, list): + LOG.debug("stack entry is not a list") + return + if len(entry) != 2: + LOG.debug("stack entry doesn't have exactly 2 elements") + return + + self.stacks.append(stack) + + except Exception as e: + LOG.debug("Exception while parsing request: " + str(e)) + return + + self.isValidRequest = True + + def ForwardRequest(self, indexes, stack, modules, symbolicatedStack): + LOG.debug("Forwarding " + str(len(stack)) + " PCs for symbolication") + + try: + url = self.symFileManager.sOptions["remoteSymbolServer"] + rawModules = [] + moduleToIndex = {} + newIndexToOldIndex = {} + for moduleIndex, m in modules: + l = [m.libName, m.breakpadId] + newModuleIndex = len(rawModules) + rawModules.append(l) + moduleToIndex[m] = newModuleIndex + newIndexToOldIndex[newModuleIndex] = moduleIndex + + rawStack = [] + for entry in stack: + moduleIndex = entry[0] + offset = entry[1] + module = self.combinedMemoryMap[moduleIndex] + newIndex = moduleToIndex[module] + rawStack.append([newIndex, offset]) + + requestVersion = 4 + while True: + requestObj = { + "symbolSources": self.symbolSources, + "stacks": [rawStack], + "memoryMap": rawModules, + "forwarded": self.forwardCount + 1, + "version": requestVersion, + } + requestJson = json.dumps(requestObj).encode() + headers = {"Content-Type": "application/json"} + requestHandle = Request(url, requestJson, headers) + try: + response = urlopen(requestHandle) + except Exception as e: + if requestVersion == 4: + # Try again with version 3 + requestVersion = 3 + continue + raise e + succeededVersion = requestVersion + break + + except Exception as e: + LOG.error("Exception while forwarding request: " + str(e)) + return + + try: + responseJson = json.loads(response.read()) + except Exception as e: + LOG.error( + "Exception while reading server response to forwarded" + " request: " + str(e) + ) + return + + try: + if succeededVersion == 4: + responseKnownModules = responseJson["knownModules"] + for newIndex, known in enumerate(responseKnownModules): + if known and newIndex in newIndexToOldIndex: + self.knownModules[newIndexToOldIndex[newIndex]] = True + + responseSymbols = responseJson["symbolicatedStacks"][0] + else: + responseSymbols = responseJson[0] + if len(responseSymbols) != len(stack): + LOG.error( + str(len(responseSymbols)) + + " symbols in response, " + + str(len(stack)) + + " PCs in request!" + ) + return + + for index in range(0, len(stack)): + symbol = responseSymbols[index] + originalIndex = indexes[index] + symbolicatedStack[originalIndex] = symbol + except Exception as e: + LOG.error( + "Exception while parsing server response to forwarded" + " request: " + str(e) + ) + return + + def Symbolicate(self, stackNum): + # Check if we should forward requests when required sym files don't + # exist + shouldForwardRequests = False + if ( + self.symFileManager.sOptions["remoteSymbolServer"] + and self.forwardCount < MAX_FORWARDED_REQUESTS + ): + shouldForwardRequests = True + + # Symbolicate each PC + pcIndex = -1 + symbolicatedStack = [] + missingSymFiles = [] + unresolvedIndexes = [] + unresolvedStack = [] + unresolvedModules = [] + stack = self.stacks[stackNum] + + for moduleIndex, module in enumerate(self.combinedMemoryMap): + if not self.symFileManager.GetLibSymbolMap( + module.libName, module.breakpadId, self.symbolSources + ): + missingSymFiles.append((module.libName, module.breakpadId)) + if shouldForwardRequests: + unresolvedModules.append((moduleIndex, module)) + else: + self.knownModules[moduleIndex] = True + + for entry in stack: + pcIndex += 1 + moduleIndex = entry[0] + offset = entry[1] + if moduleIndex == -1: + symbolicatedStack.append(hex(offset)) + continue + module = self.combinedMemoryMap[moduleIndex] + + if (module.libName, module.breakpadId) in missingSymFiles: + if shouldForwardRequests: + unresolvedIndexes.append(pcIndex) + unresolvedStack.append(entry) + symbolicatedStack.append(hex(offset) + " (in " + module.libName + ")") + continue + + functionName = None + libSymbolMap = self.symFileManager.GetLibSymbolMap( + module.libName, module.breakpadId, self.symbolSources + ) + functionName = libSymbolMap.Lookup(offset) + + if functionName is None: + functionName = hex(offset) + symbolicatedStack.append(functionName + " (in " + module.libName + ")") + + # Ask another server for help symbolicating unresolved addresses + if len(unresolvedStack) > 0 or len(unresolvedModules) > 0: + self.ForwardRequest( + unresolvedIndexes, unresolvedStack, unresolvedModules, symbolicatedStack + ) + + return symbolicatedStack diff --git a/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/viewgeckoprofile.py b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/viewgeckoprofile.py new file mode 100644 index 0000000000..95c73cf503 --- /dev/null +++ b/testing/mozbase/mozgeckoprofiler/mozgeckoprofiler/viewgeckoprofile.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import argparse +import os +import socket +import sys +import webbrowser + +import six +from mozlog import commandline, get_proxy_logger +from mozlog.commandline import add_logging_group + +here = os.path.abspath(os.path.dirname(__file__)) +LOG = get_proxy_logger("profiler") + +if six.PY2: + # Import for Python 2 + from urllib import quote + + from SimpleHTTPServer import SimpleHTTPRequestHandler + from SocketServer import TCPServer +else: + # Import for Python 3 + from http.server import SimpleHTTPRequestHandler + from socketserver import TCPServer + from urllib.parse import quote + + +class ProfileServingHTTPRequestHandler(SimpleHTTPRequestHandler): + """Extends the basic SimpleHTTPRequestHandler (which serves a directory + of files) to include request headers required by profiler.firefox.com""" + + def end_headers(self): + self.send_header("Access-Control-Allow-Origin", "https://profiler.firefox.com") + SimpleHTTPRequestHandler.end_headers(self) + + +class ViewGeckoProfile(object): + """Container class for ViewGeckoProfile""" + + def __init__(self, gecko_profile_data_path): + self.gecko_profile_data_path = gecko_profile_data_path + self.gecko_profile_dir = os.path.dirname(gecko_profile_data_path) + self.profiler_url = "https://profiler.firefox.com/from-url/" + self.httpd = None + self.host = "127.0.0.1" + self.port = None + self.oldcwd = os.getcwd() + + def setup_http_server(self): + # pick a free port + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.bind(("", 0)) + self.port = sock.getsockname()[1] + sock.close() + + # Temporarily change the directory to the profile directory. + os.chdir(self.gecko_profile_dir) + self.httpd = TCPServer((self.host, self.port), ProfileServingHTTPRequestHandler) + + def handle_single_request(self): + self.httpd.handle_request() + # Go back to the old cwd, which some infrastructure may be relying on. + os.chdir(self.oldcwd) + + def encode_url(self): + # Encode url i.e.: https://profiler.firefox.com/from-url/http... + file_url = "http://{}:{}/{}".format( + self.host, self.port, os.path.basename(self.gecko_profile_data_path) + ) + + self.profiler_url = self.profiler_url + quote(file_url, safe="") + LOG.info("Temporarily serving the profile from: %s" % file_url) + + def open_profile_in_browser(self): + # Open the file in the user's preferred browser. + LOG.info("Opening the profile: %s" % self.profiler_url) + webbrowser.open_new_tab(self.profiler_url) + + +def create_parser(mach_interface=False): + parser = argparse.ArgumentParser() + add_arg = parser.add_argument + + add_arg( + "-p", + "--profile-zip", + required=True, + dest="profile_zip", + help="path to the gecko profiles zip file to open in profiler.firefox.com", + ) + + add_logging_group(parser) + return parser + + +def verify_options(parser, args): + ctx = vars(args) + + if not os.path.isfile(args.profile_zip): + parser.error("{profile_zip} does not exist!".format(**ctx)) + + +def parse_args(argv=None): + parser = create_parser() + args = parser.parse_args(argv) + verify_options(parser, args) + return args + + +def view_gecko_profile(profile_path): + """ + Open a gecko profile in the user's default browser. This function opens + up a special URL to profiler.firefox.com and serves up the local profile. + """ + view_gecko_profile = ViewGeckoProfile(profile_path) + + view_gecko_profile.setup_http_server() + view_gecko_profile.encode_url() + view_gecko_profile.open_profile_in_browser() + view_gecko_profile.handle_single_request() + + +def start_from_command_line(): + args = parse_args(sys.argv[1:]) + commandline.setup_logging("view-gecko-profile", args, {"tbpl": sys.stdout}) + + view_gecko_profile(args.profile_zip) + + +if __name__ == "__main__": + start_from_command_line() diff --git a/testing/mozbase/mozgeckoprofiler/setup.py b/testing/mozbase/mozgeckoprofiler/setup.py new file mode 100644 index 0000000000..0c7949cae9 --- /dev/null +++ b/testing/mozbase/mozgeckoprofiler/setup.py @@ -0,0 +1,32 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +from setuptools import setup + +PACKAGE_NAME = "mozgeckoprofiler" +PACKAGE_VERSION = "1.0.0" + +setup( + name=PACKAGE_NAME, + version=PACKAGE_VERSION, + description="Library to generate and view performance data in the Firefox Profiler", + long_description="see https://firefox-source-docs.mozilla.org/mozgeckoprofiler/index.html", + classifiers=[ + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.5", + "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", + ], + keywords="mozilla", + author="Mozilla Automation and Tools team", + author_email="tools@lists.mozilla.org", + url="https://wiki.mozilla.org/Auto-tools/Projects/Mozbase", + license="MPL", + packages=["mozgeckoprofiler"], + include_package_data=True, + zip_safe=False, + install_requires=[], + tests_require=[], +) diff --git a/testing/mozbase/mozgeckoprofiler/tests/manifest.ini b/testing/mozbase/mozgeckoprofiler/tests/manifest.ini new file mode 100644 index 0000000000..e3f7083bb0 --- /dev/null +++ b/testing/mozbase/mozgeckoprofiler/tests/manifest.ini @@ -0,0 +1,3 @@ +[DEFAULT] +subsuite = mozbase +[test_view_gecko_profiler.py] diff --git a/testing/mozbase/mozgeckoprofiler/tests/test_view_gecko_profiler.py b/testing/mozbase/mozgeckoprofiler/tests/test_view_gecko_profiler.py new file mode 100644 index 0000000000..c22b16fe18 --- /dev/null +++ b/testing/mozbase/mozgeckoprofiler/tests/test_view_gecko_profiler.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python + +import io +import os +import re +import shutil +import tempfile +import threading +import time +import unittest +from unittest import mock + +import mozunit +import requests +import six +from mozgeckoprofiler import view_gecko_profile + +if six.PY2: + # Import for Python 2 + from urllib import unquote +else: + # Import for Python 3 + from urllib.parse import unquote + + +def access_profiler_link(file_url, response): + """Attempts to access the profile in a loop for 5 seconds. + + This is run from a separate thread. + """ + timeout = 5 # seconds + start = time.time() + + while time.time() - start < timeout: + # Poll the server to try and get a response. + result = requests.get(url=file_url) + if result.ok: + # Return the text back in a list. + response[0] = result.text + return + time.sleep(0.1) + + response[0] = "Accessing the profiler link timed out after %s seconds" % timeout + + +class TestViewGeckoProfile(unittest.TestCase): + """Tests the opening local profiles in the Firefox Profiler.""" + + def setUp(self): + self.firefox_profiler_url = None + self.thread = None + self.response = [None] + + def test_view_gecko_profile(self): + + # Create a temporary fake performance profile. + temp_dir = tempfile.mkdtemp() + profile_path = os.path.join(temp_dir, "fakeprofile.json") + with io.open(profile_path, "w") as f: + f.write(u"FAKE_PROFILE") + + # Mock the open_new_tab function so that we know when the view_gecko_profile + # function has done all of its work, and we can assert ressult of the + # user behavior. + def mocked_open_new_tab(firefox_profiler_url): + self.firefox_profiler_url = firefox_profiler_url + encoded_file_url = firefox_profiler_url.split("/")[-1] + decoded_file_url = unquote(encoded_file_url) + # Extract the actual file from the path. + self.thread = threading.Thread( + target=access_profiler_link, args=(decoded_file_url, self.response) + ) + print("firefox_profiler_url %s" % firefox_profiler_url) + print("encoded_file_url %s" % encoded_file_url) + print("decoded_file_url %s" % decoded_file_url) + self.thread.start() + + with mock.patch("webbrowser.open_new_tab", new=mocked_open_new_tab): + # Run the test + view_gecko_profile(profile_path) + + self.thread.join() + + # Compare the URLs, but replace the PORT value supplied, as that is dynamic. + expected_url = ( + "https://profiler.firefox.com/from-url/" + "http%3A%2F%2F127.0.0.1%3A{PORT}%2Ffakeprofile.json" + ) + actual_url = re.sub("%3A\d+%2F", "%3A{PORT}%2F", self.firefox_profiler_url) + + self.assertEqual( + actual_url, + expected_url, + "The URL generated was correct for the Firefox Profiler.", + ) + self.assertEqual( + self.response[0], + "FAKE_PROFILE", + "The response from the serve provided the profile contents.", + ) + + shutil.rmtree(temp_dir) + + +if __name__ == "__main__": + mozunit.main() |