summaryrefslogtreecommitdiffstats
path: root/testing/talos/talos/xtalos/etlparser.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--testing/talos/talos/xtalos/etlparser.py640
1 files changed, 640 insertions, 0 deletions
diff --git a/testing/talos/talos/xtalos/etlparser.py b/testing/talos/talos/xtalos/etlparser.py
new file mode 100644
index 0000000000..10fa5c64d3
--- /dev/null
+++ b/testing/talos/talos/xtalos/etlparser.py
@@ -0,0 +1,640 @@
+#!/usr/bin/env python
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import csv
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+
+from talos.xtalos import xtalos
+
+EVENTNAME_INDEX = 0
+PROCESS_INDEX = 2
+THREAD_ID_INDEX = 3
+PARENT_PID_INDEX = 3
+DISKBYTES_COL = "Size"
+FNAME_COL = "FileName"
+IMAGEFUNC_COL = "Image!Function"
+EVENTGUID_COL = "EventGuid"
+ACTIVITY_ID_COL = "etw:ActivityId"
+NUMBYTES_COL = "NumBytes"
+BYTESSENT_COL = "BytesSent"
+
+CEVT_WINDOWS_RESTORED = "{917b96b1-ecad-4dab-a760-8d49027748ae}"
+CEVT_XPCOM_SHUTDOWN = "{26d1e091-0ae7-4f49-a554-4214445c505c}"
+NAME_SUBSTITUTIONS = [
+ # Careful with your regex!
+ # Substitution happens after combinations like \t \s \n ... are replaced
+ # with their real representations. So, prepend them with extra backslash.
+ # Read more: https://docs.python.org/2.7/library/re.html#re.sub
+ (re.compile(r"{\w{8}-\w{4}-\w{4}-\w{4}-\w{12}}"), "{uuid}"),
+ (re.compile(r"talos\\tests\\tp5n\\.*"), r"talos\\tests\{tp5n_files}"),
+ (re.compile(r"nvidia corporation\\3d vision\\.*"), "{nvidia_3d_vision}"),
+ (re.compile(r"cltbld\.t-w732-ix-\d+\.\d+"), "{cltbld}"),
+ (
+ re.compile(r"venv\\lib\\site-packages\\pip\\_vendor\\.*"),
+ r"venv\\lib\\site-packages\\{pip_vendor}",
+ ),
+]
+stages = ["startup", "normal", "shutdown"]
+net_events = {
+ "TcpDataTransferReceive": "recv",
+ "UdpEndpointReceiveMessages": "recv",
+ "TcpDataTransferSend": "send",
+ "UdpEndpointSendMessages": "send",
+}
+gThreads = {}
+gConnectionIDs = {}
+gHeaders = {}
+gBrowserPID = None
+
+
+def uploadFile(filename):
+ mud = os.environ.get("MOZ_UPLOAD_DIR", None)
+ if mud:
+ print("uploading raw file %s via blobber" % filename)
+ mud_filename = os.path.join(mud, filename)
+ shutil.copyfile(filename, "%s.log" % mud_filename)
+
+
+def filterOutHeader(data):
+ # -1 means we have not yet found the header
+ # 0 means we are in the header
+ # 1+ means that we are past the header
+ state = -1
+ done = False
+ while not done:
+ try:
+ row = next(data)
+ except StopIteration:
+ done = True
+ break
+ except UnicodeDecodeError:
+ continue
+ except csv.Error:
+ continue
+
+ if not len(row):
+ continue
+
+ if state < 0:
+ # Keep looking for the header (denoted by "BeginHeader").
+ if row[0] == "BeginHeader":
+ state = 0
+ continue
+
+ if state == 0:
+ # Eventually, we'll find the end (denoted by "EndHeader").
+ if row[0] == "EndHeader":
+ state = 1
+ continue
+
+ gHeaders[row[EVENTNAME_INDEX]] = row
+ continue
+
+ if state >= 1:
+ state = state + 1
+
+ # The line after "EndHeader" is also not useful, so we want to strip
+ # that in addition to the header.
+ if state > 2:
+ yield row
+
+
+def getIndex(eventName, *colNames):
+ eventHeader = gHeaders[eventName]
+
+ for colName in colNames:
+ try:
+ return eventHeader.index(colName)
+ except ValueError:
+ pass
+
+ return None
+
+
+def readFile(filename):
+ data = csv.reader(
+ open(filename, "r", encoding="cp1252"),
+ delimiter=",",
+ quotechar='"',
+ skipinitialspace=True,
+ )
+ data = filterOutHeader(data)
+ return data
+
+
+def fileSummary(row, stage, retVal):
+ event = row[EVENTNAME_INDEX]
+
+ # TODO: do we care about the other events?
+ if event not in ("FileIoRead", "FileIoWrite"):
+ return
+ fname_index = getIndex(event, FNAME_COL)
+
+ # We only care about events that have a file name.
+ if fname_index is None:
+ return
+
+ # Some data rows are missing the filename?
+ if len(row) <= fname_index:
+ return
+
+ thread_extra = ""
+ if gThreads[row[THREAD_ID_INDEX]] == "main":
+ thread_extra = " (main)"
+ key_tuple = (
+ row[fname_index],
+ "%s%s" % (row[THREAD_ID_INDEX], thread_extra),
+ stages[stage],
+ )
+ total_tuple = (
+ row[fname_index],
+ "%s%s" % (row[THREAD_ID_INDEX], thread_extra),
+ "all",
+ )
+
+ if key_tuple not in retVal:
+ retVal[key_tuple] = {
+ "DiskReadBytes": 0,
+ "DiskReadCount": 0,
+ "DiskWriteBytes": 0,
+ "DiskWriteCount": 0,
+ }
+ retVal[total_tuple] = retVal[key_tuple]
+
+ if event == "FileIoRead":
+ retVal[key_tuple]["DiskReadCount"] += 1
+ retVal[total_tuple]["DiskReadCount"] += 1
+ idx = getIndex(event, DISKBYTES_COL)
+ retVal[key_tuple]["DiskReadBytes"] += int(row[idx], 16)
+ retVal[total_tuple]["DiskReadBytes"] += int(row[idx], 16)
+ elif event == "FileIoWrite":
+ retVal[key_tuple]["DiskWriteCount"] += 1
+ retVal[total_tuple]["DiskWriteCount"] += 1
+ idx = getIndex(event, DISKBYTES_COL)
+ retVal[key_tuple]["DiskWriteBytes"] += int(row[idx], 16)
+ retVal[total_tuple]["DiskWriteBytes"] += int(row[idx], 16)
+
+
+def etl2csv(xperf_path, etl_filename, debug=False):
+ """
+ Convert etl_filename to etl_filename.csv (temp file) which is the .csv
+ representation of the .etl file Etlparser will read this .csv and parse
+ the information we care about into the final output. This is done to keep
+ things simple and to preserve resources on talos machines
+ (large files == high memory + cpu)
+ """
+
+ xperf_cmd = [
+ xperf_path,
+ "-merge",
+ "%s.user" % etl_filename,
+ "%s.kernel" % etl_filename,
+ etl_filename,
+ ]
+ if debug:
+ print("executing '%s'" % subprocess.list2cmdline(xperf_cmd))
+ subprocess.call(xperf_cmd)
+
+ csv_filename = "%s.csv" % etl_filename
+ xperf_cmd = [xperf_path, "-i", etl_filename, "-o", csv_filename]
+ if debug:
+ print("executing '%s'" % subprocess.list2cmdline(xperf_cmd))
+ subprocess.call(xperf_cmd)
+ return csv_filename
+
+
+def trackProcess(row, firstFirefoxPID):
+ global gBrowserPID
+ if gBrowserPID:
+ return
+
+ # Without the launcher, the initial Firefox process *is* the browser
+ # process. OTOH, with the launcher process enabled, the browser is actually
+ # the first child process of the first Firefox process.
+ parentPID = int(row[PARENT_PID_INDEX])
+ if parentPID == firstFirefoxPID:
+ proc = row[PROCESS_INDEX]
+ gBrowserPID = int(re.search("^.* \(\s*(\d+)\)$", proc).group(1))
+
+
+def getBrowserPID():
+ global gBrowserPID
+ return gBrowserPID
+
+
+def trackThread(row, browserPID):
+ event, proc, tid = row[EVENTNAME_INDEX], row[PROCESS_INDEX], row[THREAD_ID_INDEX]
+ if event in ["T-DCStart", "T-Start"]:
+ procName, procID = re.search("^(.*) \(\s*(\d+)\)$", proc).group(1, 2)
+ if procID == str(browserPID):
+ imgIdx = getIndex(event, IMAGEFUNC_COL)
+ img = re.match("([^!]+)!", row[imgIdx]).group(1)
+ if img == procName:
+ gThreads[tid] = "main"
+ else:
+ gThreads[tid] = "nonmain"
+ elif event in ["T-DCEnd", "T-End"] and tid in gThreads:
+ del gThreads[tid]
+
+
+def trackThreadFileIO(row, io, stage):
+ event, tid = row[EVENTNAME_INDEX], row[THREAD_ID_INDEX]
+ opType = {"FileIoWrite": "write", "FileIoRead": "read"}[event]
+ th, stg = gThreads[tid], stages[stage]
+ sizeIdx = getIndex(event, DISKBYTES_COL)
+ bytes = int(row[sizeIdx], 16)
+ io[(th, stg, "file_%s_ops" % opType)] = (
+ io.get((th, stg, "file_%s_ops" % opType), 0) + 1
+ )
+ io[(th, stg, "file_%s_bytes" % opType)] = (
+ io.get((th, stg, "file_%s_bytes" % opType), 0) + bytes
+ )
+ io[(th, stg, "file_io_bytes")] = io.get((th, stg, "file_io_bytes"), 0) + bytes
+
+
+def trackThreadNetIO(row, io, stage):
+ event, tid = row[EVENTNAME_INDEX], row[THREAD_ID_INDEX]
+ connIdIdx = getIndex(event, ACTIVITY_ID_COL)
+ connID = row[connIdIdx]
+ if connID not in gConnectionIDs:
+ gConnectionIDs[connID] = tid
+ origThread = gConnectionIDs[connID]
+ if origThread in gThreads:
+ match = re.match("[\w-]+\/([\w-]+)?", event)
+ if not match:
+ raise xtalos.XTalosError(
+ "Could not find a regular expression match for event: {}".format(event)
+ )
+ netEvt = match.group(1)
+
+ if netEvt in net_events:
+ opType = net_events[netEvt]
+ th, stg = gThreads[origThread], stages[stage]
+ # On newer versions of Windows, some net I/O events have switched to
+ # using BYTESSENT_COL, so we try both
+ lenIdx = getIndex(event, NUMBYTES_COL, BYTESSENT_COL)
+ bytes = int(row[lenIdx])
+ io[(th, stg, "net_%s_bytes" % opType)] = (
+ io.get((th, stg, "net_%s_bytes" % opType), 0) + bytes
+ )
+ io[(th, stg, "net_io_bytes")] = io.get((th, stg, "net_io_bytes"), 0) + bytes
+
+
+def updateStage(row, stage):
+ guidIdx = getIndex(row[EVENTNAME_INDEX], EVENTGUID_COL)
+ if row[guidIdx] == CEVT_WINDOWS_RESTORED and stage == 0:
+ stage = 1
+ elif row[guidIdx] == CEVT_XPCOM_SHUTDOWN and stage == 1:
+ stage = 2
+ return stage
+
+
+def loadAllowlist(filename):
+ if not filename:
+ return
+ if not os.path.exists(filename):
+ print("Warning: xperf allowlist %s was not found" % filename)
+ return
+ lines = open(filename).readlines()
+ # Expand paths
+ lines = [os.path.expandvars(elem.strip()) for elem in lines]
+ files = set()
+ dirs = set()
+ recur = set()
+ for line in lines:
+ if line.startswith("#"):
+ continue
+ elif line.endswith("\\*\\*"):
+ recur.add(line[:-4])
+ elif line.endswith("\\*"):
+ dirs.add(line[:-2])
+ else:
+ files.add(line)
+ return (files, dirs, recur)
+
+
+def checkAllowlist(filename, allowlist):
+ if not allowlist:
+ return False
+ if filename in allowlist[0]:
+ return True
+ if os.path.dirname(filename) in allowlist[1]:
+ return True
+ head = filename
+ while len(head) > 3: # Length 3 implies root directory, e.g. C:\
+ head, tail = os.path.split(head)
+ if head in allowlist[2]:
+ return True
+ return False
+
+
+def etlparser(
+ xperf_path,
+ etl_filename,
+ processID,
+ approot=None,
+ configFile=None,
+ outputFile=None,
+ allowlist_file=None,
+ error_filename=None,
+ all_stages=False,
+ all_threads=False,
+ debug=False,
+):
+ global NAME_SUBSTITUTIONS
+
+ # setup output file
+ if outputFile:
+ outFile = open(outputFile, "w")
+ else:
+ outFile = sys.stdout
+
+ files = {}
+ io = {}
+ stage = 0
+
+ csvname = etl2csv(xperf_path, etl_filename, debug=debug)
+ for row in readFile(csvname):
+ event = row[EVENTNAME_INDEX]
+ if event == "P-Start":
+ trackProcess(row, processID)
+ elif event in ["T-DCStart", "T-Start", "T-DCEnd", "T-End"]:
+ trackThread(row, getBrowserPID())
+ elif (
+ event in ["FileIoRead", "FileIoWrite"] and row[THREAD_ID_INDEX] in gThreads
+ ):
+ fileSummary(row, stage, files)
+ trackThreadFileIO(row, io, stage)
+ elif event.endswith("Event/Classic") and row[THREAD_ID_INDEX] in gThreads:
+ stage = updateStage(row, stage)
+ elif event.startswith("Microsoft-Windows-TCPIP"):
+ trackThreadNetIO(row, io, stage)
+
+ if debug:
+ uploadFile(csvname)
+
+ output = "thread, stage, counter, value\n"
+ for cntr in sorted(io.keys()):
+ output += "%s, %s\n" % (", ".join(cntr), str(io[cntr]))
+ if outputFile:
+ fname = "%s_thread_stats%s" % os.path.splitext(outputFile)
+ with open(fname, "w") as f:
+ f.write(output)
+
+ if debug:
+ uploadFile(fname)
+ else:
+ print(output)
+
+ allowlist = loadAllowlist(allowlist_file)
+
+ header = "filename, tid, stage, readcount, readbytes, writecount," " writebytes"
+ outFile.write(header + "\n")
+
+ # Filter out stages, threads, and allowlisted files that we're not
+ # interested in
+ filekeys = [
+ x
+ for x in files.keys()
+ if (all_stages or x[2] == stages[0])
+ and (all_threads or x[1].endswith("(main)"))
+ and (all_stages and x[2] != stages[0] or not checkAllowlist(x[0], allowlist))
+ ]
+ if debug:
+ # in debug, we want stages = [startup+normal] and all threads, not just (main)
+ # we will use this data to upload fileIO info to blobber only for debug mode
+ outputData = [
+ x
+ for x in files.keys()
+ if (all_stages or x[2] in [stages[0], stages[1]])
+ and (
+ all_stages
+ and x[2] not in [stages[0], stages[1]]
+ or not checkAllowlist(x[0], allowlist)
+ )
+ ]
+ else:
+ outputData = filekeys
+
+ # output data
+ for row in outputData:
+ output = "%s, %s, %s, %s, %s, %s, %s\n" % (
+ row[0],
+ row[1],
+ row[2],
+ files[row]["DiskReadCount"],
+ files[row]["DiskReadBytes"],
+ files[row]["DiskWriteCount"],
+ files[row]["DiskWriteBytes"],
+ )
+ outFile.write(output)
+
+ if outputFile:
+ # close the file handle
+ outFile.close()
+ if debug:
+ uploadFile(outputFile)
+
+ # We still like to have the outputfile to record the raw data, now
+ # filter out acceptable files/ranges
+ allowlist_path = None
+ wl_temp = {}
+ dirname = os.path.dirname(__file__)
+ if os.path.exists(os.path.join(dirname, "xperf_allowlist.json")):
+ allowlist_path = os.path.join(dirname, "xperf_allowlist.json")
+ elif os.path.exists(os.path.join(dirname, "xtalos")) and os.path.exists(
+ os.path.join(dirname, "xtalos", "xperf_allowlist.json")
+ ):
+ allowlist_path = os.path.join(dirname, "xtalos", "xperf_allowlist.json")
+
+ wl_temp = {}
+ if allowlist_path:
+ with open(allowlist_path, "r") as fHandle:
+ wl_temp = json.load(fHandle)
+
+ # Approot is the full path where the application is located at
+ # We depend on it for dependentlibs.list to ignore files required for
+ # normal startup.
+ if approot:
+ if os.path.exists("%s\\dependentlibs.list" % approot):
+ with open("%s\\dependentlibs.list" % approot, "r") as fhandle:
+ libs = fhandle.readlines()
+
+ for lib in libs:
+ wl_temp["{firefox}\\%s" % lib.strip()] = {"ignore": True}
+
+ # Windows isn't case sensitive, this protects us against mismatched
+ # systems.
+ wl = {}
+ for item in wl_temp:
+ wl[item.lower()] = wl_temp[item]
+
+ errors = []
+ for row in filekeys:
+ filename = original_filename = row[0]
+ filename = filename.lower()
+ # take care of 'program files (x86)' matching 'program files'
+ filename = filename.replace(" (x86)", "")
+
+ paths = ["profile", "firefox", "desktop", "talos"]
+ for path in paths:
+ pathname = "%s\\" % path
+ parts = filename.split(pathname)
+ if len(parts) >= 2:
+ filename = "{%s}\\%s" % (path, pathname.join(parts[1:]))
+
+ parts = filename.split("\\installtime")
+ if len(parts) >= 2:
+ filename = "%s\\{time}" % parts[0]
+
+ # NOTE: this is Prefetch or prefetch, not case sensitive operating
+ # system
+ parts = filename.split("refetch")
+ if len(parts) >= 2:
+ filename = "%srefetch\\{prefetch}.pf" % parts[0]
+
+ for pattern, substitution in NAME_SUBSTITUTIONS:
+ filename = re.sub(pattern, substitution, filename)
+
+ if filename in wl:
+ if "ignore" in wl[filename] and wl[filename]["ignore"]:
+ continue
+
+ # too noisy
+ # if wl[filename]['minbytes'] > (files[row]['DiskReadBytes'] +\
+ # files[row]['DiskWriteBytes']):
+ # print "%s: read %s bytes, less than expected minimum: %s"
+ # % (filename, (files[row]['DiskReadBytes'] +
+ # files[row]['DiskWriteBytes']),
+ # wl[filename]['minbytes'])
+
+ # don't report in first round
+ # elif wl[filename]['maxbytes'] < (files[row]['DiskReadBytes'] +\
+ # files[row]['DiskWriteBytes']):
+ # errors.append("%s: read %s bytes, more than expected maximum:"
+ # " %s"
+ # % (filename, (files[row]['DiskReadBytes'] +
+ # files[row]['DiskWriteBytes']),
+ # wl[filename]['maxbytes']))
+
+ # too noisy
+ # elif wl[filename]['mincount'] > (files[row]['DiskReadCount'] +\
+ # files[row]['DiskWriteCount']):
+ # print "%s: %s accesses, less than expected minimum: %s"
+ # % (filename, (files[row]['DiskReadCount'] +
+ # files[row]['DiskWriteCount']),
+ # wl[filename]['mincount'])
+
+ # don't report in first round
+ # elif wl[filename]['maxcount'] < (files[row]['DiskReadCount'] +\
+ # files[row]['DiskWriteCount']):
+ # errors.append("%s: %s accesses, more than expected maximum:"
+ # " %s"
+ # % (filename, (files[row]['DiskReadCount'] +
+ # files[row]['DiskWriteCount']),
+ # wl[filename]['maxcount']))
+ else:
+ errors.append(
+ "File '%s' (normalized from '%s') was accessed and we were not expecting"
+ " it. DiskReadCount: %s, DiskWriteCount: %s,"
+ " DiskReadBytes: %s, DiskWriteBytes: %s"
+ % (
+ filename,
+ original_filename,
+ files[row]["DiskReadCount"],
+ files[row]["DiskWriteCount"],
+ files[row]["DiskReadBytes"],
+ files[row]["DiskWriteBytes"],
+ )
+ )
+
+ if errors:
+ # output specific errors to be picked up by tbpl parser
+ for error in errors:
+ # NOTE: the ' :' is intentional, without the space before the :,
+ # some parser will translate this
+ print("TEST-UNEXPECTED-FAIL : xperf: %s" % error)
+
+ # We detect if browser_failures.txt exists to exit and turn the job
+ # orange
+ if error_filename:
+ with open(error_filename, "w") as errorFile:
+ errorFile.write("\n".join(errors))
+
+ if debug:
+ uploadFile(etl_filename)
+
+
+def etlparser_from_config(config_file, **kwargs):
+ """start from a YAML config file"""
+
+ # option defaults
+ args = {
+ "xperf_path": "xperf.exe",
+ "etl_filename": "test.etl",
+ "outputFile": "etl_output.csv",
+ "processID": None,
+ "approot": None,
+ "allowlist_file": None,
+ "error_filename": None,
+ "all_stages": False,
+ "all_threads": False,
+ }
+ args.update(kwargs)
+
+ # override from YAML config file
+ args = xtalos.options_from_config(args, config_file)
+
+ # ensure process ID is given
+ if not args.get("processID"):
+ raise xtalos.XTalosError("No process ID option given")
+
+ # ensure path to xperf given
+ if not os.path.exists(args["xperf_path"]):
+ raise xtalos.XTalosError(
+ "ERROR: xperf_path '%s' does not exist" % args["xperf_path"]
+ )
+
+ # update args with config file
+ args["configFile"] = config_file
+
+ # call etlparser
+ etlparser(**args)
+
+
+def main(args=sys.argv[1:]):
+
+ # parse command line arguments
+ parser = xtalos.XtalosOptions()
+ args = parser.parse_args(args)
+ args = parser.verifyOptions(args)
+ if args is None:
+ parser.error("Unable to verify arguments")
+ if not args.processID:
+ parser.error("No process ID argument given")
+
+ # call API
+ etlparser(
+ args.xperf_path,
+ args.etl_filename,
+ args.processID,
+ args.approot,
+ args.configFile,
+ args.outputFile,
+ args.allowlist_file,
+ args.error_filename,
+ args.all_stages,
+ args.all_threads,
+ debug=args.debug_level >= xtalos.DEBUG_INFO,
+ )
+
+
+if __name__ == "__main__":
+ main()