From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Fri, 19 Apr 2024 02:47:55 +0200
Subject: Adding upstream version 124.0.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 testing/mozbase/mozsystemmonitor/README.rst        |   12 +
 .../mozsystemmonitor/mozsystemmonitor/__init__.py  |    0
 .../mozsystemmonitor/resourcemonitor.py            | 1220 ++++++++++++++++++++
 testing/mozbase/mozsystemmonitor/setup.cfg         |    2 +
 testing/mozbase/mozsystemmonitor/setup.py          |   33 +
 .../mozbase/mozsystemmonitor/tests/manifest.toml   |    4 +
 .../tests/test_resource_monitor.py                 |  183 +++
 7 files changed, 1454 insertions(+)
 create mode 100644 testing/mozbase/mozsystemmonitor/README.rst
 create mode 100644 testing/mozbase/mozsystemmonitor/mozsystemmonitor/__init__.py
 create mode 100644 testing/mozbase/mozsystemmonitor/mozsystemmonitor/resourcemonitor.py
 create mode 100644 testing/mozbase/mozsystemmonitor/setup.cfg
 create mode 100644 testing/mozbase/mozsystemmonitor/setup.py
 create mode 100644 testing/mozbase/mozsystemmonitor/tests/manifest.toml
 create mode 100644 testing/mozbase/mozsystemmonitor/tests/test_resource_monitor.py

(limited to 'testing/mozbase/mozsystemmonitor')

diff --git a/testing/mozbase/mozsystemmonitor/README.rst b/testing/mozbase/mozsystemmonitor/README.rst
new file mode 100644
index 0000000000..c8dad37671
--- /dev/null
+++ b/testing/mozbase/mozsystemmonitor/README.rst
@@ -0,0 +1,12 @@
+================
+mozsystemmonitor
+================
+
+mozsystemmonitor contains modules for monitoring a running system.
+
+SystemResourceMonitor
+=====================
+
+mozsystemmonitor.resourcemonitor.SystemResourceMonitor is class used to
+measure system resource usage. It is useful to get a handle on what an
+overall system is doing.
diff --git a/testing/mozbase/mozsystemmonitor/mozsystemmonitor/__init__.py b/testing/mozbase/mozsystemmonitor/mozsystemmonitor/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/testing/mozbase/mozsystemmonitor/mozsystemmonitor/resourcemonitor.py b/testing/mozbase/mozsystemmonitor/mozsystemmonitor/resourcemonitor.py
new file mode 100644
index 0000000000..978d7d6911
--- /dev/null
+++ b/testing/mozbase/mozsystemmonitor/mozsystemmonitor/resourcemonitor.py
@@ -0,0 +1,1220 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import multiprocessing
+import sys
+import time
+import warnings
+from collections import OrderedDict, namedtuple
+from contextlib import contextmanager
+
+
+class PsutilStub(object):
+    def __init__(self):
+        self.sswap = namedtuple(
+            "sswap", ["total", "used", "free", "percent", "sin", "sout"]
+        )
+        self.sdiskio = namedtuple(
+            "sdiskio",
+            [
+                "read_count",
+                "write_count",
+                "read_bytes",
+                "write_bytes",
+                "read_time",
+                "write_time",
+            ],
+        )
+        self.pcputimes = namedtuple("pcputimes", ["user", "system"])
+        self.svmem = namedtuple(
+            "svmem",
+            [
+                "total",
+                "available",
+                "percent",
+                "used",
+                "free",
+                "active",
+                "inactive",
+                "buffers",
+                "cached",
+            ],
+        )
+
+    def cpu_count(self, logical=True):
+        return 0
+
+    def cpu_percent(self, a, b):
+        return [0]
+
+    def cpu_times(self, percpu):
+        if percpu:
+            return [self.pcputimes(0, 0)]
+        else:
+            return self.pcputimes(0, 0)
+
+    def disk_io_counters(self):
+        return self.sdiskio(0, 0, 0, 0, 0, 0)
+
+    def swap_memory(self):
+        return self.sswap(0, 0, 0, 0, 0, 0)
+
+    def virtual_memory(self):
+        return self.svmem(0, 0, 0, 0, 0, 0, 0, 0, 0)
+
+
+# psutil will raise NotImplementedError if the platform is not supported.
+try:
+    import psutil
+
+    have_psutil = True
+except Exception:
+    try:
+        # The PsutilStub should get us time intervals, at least
+        psutil = PsutilStub()
+    except Exception:
+        psutil = None
+
+    have_psutil = False
+
+
+def get_disk_io_counters():
+    try:
+        io_counters = psutil.disk_io_counters()
+
+        if io_counters is None:
+            return PsutilStub().disk_io_counters()
+    except RuntimeError:
+        io_counters = PsutilStub().disk_io_counters()
+
+    return io_counters
+
+
+def _poll(pipe, poll_interval=0.1):
+    """Wrap multiprocessing.Pipe.poll to hide POLLERR and POLLIN
+    exceptions.
+
+    multiprocessing.Pipe is not actually a pipe on at least Linux.
+    That has an effect on the expected outcome of reading from it when
+    the other end of the pipe dies, leading to possibly hanging on revc()
+    below.
+    """
+    try:
+        return pipe.poll(poll_interval)
+    except Exception:
+        # Poll might throw an exception even though there's still
+        # data to read. That happens when the underlying system call
+        # returns both POLLERR and POLLIN, but python doesn't tell us
+        # about it. So assume there is something to read, and we'll
+        # get an exception when trying to read the data.
+        return True
+
+
+def _collect(pipe, poll_interval):
+    """Collects system metrics.
+
+    This is the main function for the background process. It collects
+    data then forwards it on a pipe until told to stop.
+    """
+
+    data = []
+
+    try:
+        # Establish initial values.
+
+        io_last = get_disk_io_counters()
+        swap_last = psutil.swap_memory()
+        psutil.cpu_percent(None, True)
+        cpu_last = psutil.cpu_times(True)
+        last_time = time.monotonic()
+
+        sin_index = swap_last._fields.index("sin")
+        sout_index = swap_last._fields.index("sout")
+
+        sleep_interval = poll_interval
+
+        while not _poll(pipe, poll_interval=sleep_interval):
+            io = get_disk_io_counters()
+            virt_mem = psutil.virtual_memory()
+            swap_mem = psutil.swap_memory()
+            cpu_percent = psutil.cpu_percent(None, True)
+            cpu_times = psutil.cpu_times(True)
+            # Take the timestamp as soon as possible after getting cpu_times
+            # to reduce the likelihood of our process being interrupted between
+            # the two instructions. Having a delayed timestamp would cause the
+            # next sample to report more than 100% CPU time.
+            measured_end_time = time.monotonic()
+
+            # TODO Does this wrap? At 32 bits? At 64 bits?
+            # TODO Consider patching "delta" API to upstream.
+            io_diff = [v - io_last[i] for i, v in enumerate(io)]
+            io_last = io
+
+            cpu_diff = []
+            for core, values in enumerate(cpu_times):
+                cpu_diff.append([v - cpu_last[core][i] for i, v in enumerate(values)])
+
+            cpu_last = cpu_times
+
+            swap_entry = list(swap_mem)
+            swap_entry[sin_index] = swap_mem.sin - swap_last.sin
+            swap_entry[sout_index] = swap_mem.sout - swap_last.sout
+            swap_last = swap_mem
+
+            data.append(
+                (
+                    last_time,
+                    measured_end_time,
+                    io_diff,
+                    cpu_diff,
+                    cpu_percent,
+                    list(virt_mem),
+                    swap_entry,
+                )
+            )
+
+            collection_overhead = time.monotonic() - last_time - sleep_interval
+            last_time = measured_end_time
+            sleep_interval = max(poll_interval / 2, poll_interval - collection_overhead)
+
+    except Exception as e:
+        warnings.warn("_collect failed: %s" % e)
+
+    finally:
+        for entry in data:
+            pipe.send(entry)
+
+        pipe.send(("done", None, None, None, None, None, None))
+        pipe.close()
+
+    sys.exit(0)
+
+
+SystemResourceUsage = namedtuple(
+    "SystemResourceUsage",
+    ["start", "end", "cpu_times", "cpu_percent", "io", "virt", "swap"],
+)
+
+
+class SystemResourceMonitor(object):
+    """Measures system resources.
+
+    Each instance measures system resources from the time it is started
+    until it is finished. It does this on a separate process so it doesn't
+    impact execution of the main Python process.
+
+    Each instance is a one-shot instance. It cannot be used to record multiple
+    durations.
+
+    Aside from basic data gathering, the class supports basic analysis
+    capabilities. You can query for data between ranges. You can also tell it
+    when certain events occur and later grab data relevant to those events or
+    plot those events on a timeline.
+
+    The resource monitor works by periodically polling the state of the
+    system. By default, it polls every second. This can be adjusted depending
+    on the required granularity of the data and considerations for probe
+    overhead. It tries to probe at the interval specified. However, variations
+    should be expected. Fast and well-behaving systems should experience
+    variations in the 1ms range. Larger variations may exist if the system is
+    under heavy load or depending on how accurate socket polling is on your
+    system.
+
+    In its current implementation, data is not available until collection has
+    stopped. This may change in future iterations.
+
+    Usage
+    =====
+
+    monitor = SystemResourceMonitor()
+    monitor.start()
+
+    # Record that a single event in time just occurred.
+    foo.do_stuff()
+    monitor.record_event('foo_did_stuff')
+
+    # Record that we're about to perform a possibly long-running event.
+    with monitor.phase('long_job'):
+        foo.do_long_running_job()
+
+    # Stop recording. Currently we need to stop before data is available.
+    monitor.stop()
+
+    # Obtain the raw data for the entire probed range.
+    print('CPU Usage:')
+    for core in monitor.aggregate_cpu():
+        print(core)
+
+    # We can also request data corresponding to a specific phase.
+    for data in monitor.phase_usage('long_job'):
+        print(data.cpu_percent)
+    """
+
+    # The interprocess communication is complicated enough to warrant
+    # explanation. To work around the Python GIL, we launch a separate
+    # background process whose only job is to collect metrics. If we performed
+    # collection in the main process, the polling interval would be
+    # inconsistent if a long-running function were on the stack. Since the
+    # child process is independent of the instantiating process, data
+    # collection should be evenly spaced.
+    #
+    # As the child process collects data, it buffers it locally. When
+    # collection stops, it flushes all that data to a pipe to be read by
+    # the parent process.
+
+    instance = None
+
+    def __init__(self, poll_interval=1.0, metadata={}):
+        """Instantiate a system resource monitor instance.
+
+        The instance is configured with a poll interval. This is the interval
+        between samples, in float seconds.
+        """
+        self.start_time = None
+        self.end_time = None
+
+        self.events = []
+        self.markers = []
+        self.phases = OrderedDict()
+
+        self._active_phases = {}
+        self._active_markers = {}
+
+        self._running = False
+        self._stopped = False
+        self._process = None
+
+        if psutil is None:
+            return
+
+        # This try..except should not be needed! However, some tools (like
+        # |mach build|) attempt to load psutil before properly creating a
+        # virtualenv by building psutil. As a result, python/psutil may be in
+        # sys.path and its .py files may pick up the psutil C extension from
+        # the system install. If the versions don't match, we typically see
+        # failures invoking one of these functions.
+        try:
+            cpu_percent = psutil.cpu_percent(0.0, True)
+            cpu_times = psutil.cpu_times(False)
+            io = get_disk_io_counters()
+            virt = psutil.virtual_memory()
+            swap = psutil.swap_memory()
+        except Exception as e:
+            warnings.warn("psutil failed to run: %s" % e)
+            return
+
+        self._cpu_cores = len(cpu_percent)
+        self._cpu_times_type = type(cpu_times)
+        self._cpu_times_len = len(cpu_times)
+        self._io_type = type(io)
+        self._io_len = len(io)
+        self._virt_type = type(virt)
+        self._virt_len = len(virt)
+        self._swap_type = type(swap)
+        self._swap_len = len(swap)
+        self.start_timestamp = time.time()
+
+        self._pipe, child_pipe = multiprocessing.Pipe(True)
+
+        self._process = multiprocessing.Process(
+            target=_collect, args=(child_pipe, poll_interval)
+        )
+        self.poll_interval = poll_interval
+        self.metadata = metadata
+
+    def __del__(self):
+        if self._running:
+            self._pipe.send(("terminate",))
+            self._process.join()
+
+    # Methods to control monitoring.
+
+    def start(self):
+        """Start measuring system-wide CPU resource utilization.
+
+        You should only call this once per instance.
+        """
+        if not self._process:
+            return
+
+        self._process.start()
+        self._running = True
+        self.start_time = time.monotonic()
+        SystemResourceMonitor.instance = self
+
+    def stop(self):
+        """Stop measuring system-wide CPU resource utilization.
+
+        You should call this if and only if you have called start(). You should
+        always pair a stop() with a start().
+
+        Currently, data is not available until you call stop().
+        """
+        if not self._process:
+            self._stopped = True
+            return
+
+        self.stop_time = time.monotonic()
+        assert not self._stopped
+
+        try:
+            self._pipe.send(("terminate",))
+        except Exception:
+            pass
+        self._stopped = True
+
+        self.measurements = []
+
+        # The child process will send each data sample over the pipe
+        # as a separate data structure. When it has finished sending
+        # samples, it sends a special "done" message to indicate it
+        # is finished.
+
+        while _poll(self._pipe, poll_interval=0.1):
+            try:
+                (
+                    start_time,
+                    end_time,
+                    io_diff,
+                    cpu_diff,
+                    cpu_percent,
+                    virt_mem,
+                    swap_mem,
+                ) = self._pipe.recv()
+            except Exception as e:
+                warnings.warn("failed to receive data: %s" % e)
+                # Assume we can't recover
+                break
+
+            # There should be nothing after the "done" message so
+            # terminate.
+            if start_time == "done":
+                break
+
+            try:
+                io = self._io_type(*io_diff)
+                virt = self._virt_type(*virt_mem)
+                swap = self._swap_type(*swap_mem)
+                cpu_times = [self._cpu_times_type(*v) for v in cpu_diff]
+
+                self.measurements.append(
+                    SystemResourceUsage(
+                        start_time, end_time, cpu_times, cpu_percent, io, virt, swap
+                    )
+                )
+            except Exception:
+                # We also can't recover, but output the data that caused the exception
+                warnings.warn(
+                    "failed to read the received data: %s"
+                    % str(
+                        (
+                            start_time,
+                            end_time,
+                            io_diff,
+                            cpu_diff,
+                            cpu_percent,
+                            virt_mem,
+                            swap_mem,
+                        )
+                    )
+                )
+
+                break
+
+        # We establish a timeout so we don't hang forever if the child
+        # process has crashed.
+        if self._running:
+            self._process.join(10)
+            if self._process.is_alive():
+                self._process.terminate()
+                self._process.join(10)
+
+        self._running = False
+        SystemResourceUsage.instance = None
+        self.end_time = time.monotonic()
+
+    # Methods to record events alongside the monitored data.
+
+    @staticmethod
+    def record_event(name):
+        """Record an event as occuring now.
+
+        Events are actions that occur at a specific point in time. If you are
+        looking for an action that has a duration, see the phase API below.
+        """
+        if SystemResourceMonitor.instance:
+            SystemResourceMonitor.instance.events.append((time.monotonic(), name))
+
+    @staticmethod
+    def record_marker(name, start, end, text):
+        """Record a marker with a duration and an optional text
+
+        Markers are typically used to record when a single command happened.
+        For actions with a longer duration that justifies tracking resource use
+        see the phase API below.
+        """
+        if SystemResourceMonitor.instance:
+            SystemResourceMonitor.instance.markers.append((name, start, end, text))
+
+    @staticmethod
+    def begin_marker(name, text, disambiguator=None):
+        if SystemResourceMonitor.instance:
+            id = name + ":" + text
+            if disambiguator:
+                id += ":" + disambiguator
+            SystemResourceMonitor.instance._active_markers[id] = time.monotonic()
+
+    @staticmethod
+    def end_marker(name, text, disambiguator=None):
+        if not SystemResourceMonitor.instance:
+            return
+        end = time.monotonic()
+        id = name + ":" + text
+        if disambiguator:
+            id += ":" + disambiguator
+        if not id in SystemResourceMonitor.instance._active_markers:
+            return
+        start = SystemResourceMonitor.instance._active_markers.pop(id)
+        SystemResourceMonitor.instance.record_marker(name, start, end, text)
+
+    @contextmanager
+    def phase(self, name):
+        """Context manager for recording an active phase."""
+        self.begin_phase(name)
+        yield
+        self.finish_phase(name)
+
+    def begin_phase(self, name):
+        """Record the start of a phase.
+
+        Phases are actions that have a duration. Multiple phases can be active
+        simultaneously. Phases can be closed in any order.
+
+        Keep in mind that if phases occur in parallel, it will become difficult
+        to isolate resource utilization specific to individual phases.
+        """
+        assert name not in self._active_phases
+
+        self._active_phases[name] = time.monotonic()
+
+    def finish_phase(self, name):
+        """Record the end of a phase."""
+
+        assert name in self._active_phases
+
+        phase = (self._active_phases[name], time.monotonic())
+        self.phases[name] = phase
+        del self._active_phases[name]
+
+        return phase[1] - phase[0]
+
+    # Methods to query data.
+
+    def range_usage(self, start=None, end=None):
+        """Obtain the usage data falling within the given time range.
+
+        This is a generator of SystemResourceUsage.
+
+        If no time range bounds are given, all data is returned.
+        """
+        if not self._stopped or self.start_time is None:
+            return
+
+        if start is None:
+            start = self.start_time
+
+        if end is None:
+            end = self.end_time
+
+        for entry in self.measurements:
+            if entry.start < start:
+                continue
+
+            if entry.end > end:
+                break
+
+            yield entry
+
+    def phase_usage(self, phase):
+        """Obtain usage data for a specific phase.
+
+        This is a generator of SystemResourceUsage.
+        """
+        time_start, time_end = self.phases[phase]
+
+        return self.range_usage(time_start, time_end)
+
+    def between_events_usage(self, start_event, end_event):
+        """Obtain usage data between two point events.
+
+        This is a generator of SystemResourceUsage.
+        """
+        start_time = None
+        end_time = None
+
+        for t, name in self.events:
+            if name == start_event:
+                start_time = t
+            elif name == end_event:
+                end_time = t
+
+        if start_time is None:
+            raise Exception("Could not find start event: %s" % start_event)
+
+        if end_time is None:
+            raise Exception("Could not find end event: %s" % end_event)
+
+        return self.range_usage(start_time, end_time)
+
+    def aggregate_cpu_percent(self, start=None, end=None, phase=None, per_cpu=True):
+        """Obtain the aggregate CPU percent usage for a range.
+
+        Returns a list of floats representing average CPU usage percentage per
+        core if per_cpu is True (the default). If per_cpu is False, return a
+        single percentage value.
+
+        By default this will return data for the entire instrumented interval.
+        If phase is defined, data for a named phase will be returned. If start
+        and end are defined, these times will be fed into range_usage().
+        """
+        cpu = [[] for i in range(0, self._cpu_cores)]
+
+        if phase:
+            data = self.phase_usage(phase)
+        else:
+            data = self.range_usage(start, end)
+
+        for usage in data:
+            for i, v in enumerate(usage.cpu_percent):
+                cpu[i].append(v)
+
+        samples = len(cpu[0])
+
+        if not samples:
+            return 0
+
+        if per_cpu:
+            # pylint --py3k W1619
+            return [sum(x) / samples for x in cpu]
+
+        cores = [sum(x) for x in cpu]
+
+        # pylint --py3k W1619
+        return sum(cores) / len(cpu) / samples
+
+    def aggregate_cpu_times(self, start=None, end=None, phase=None, per_cpu=True):
+        """Obtain the aggregate CPU times for a range.
+
+        If per_cpu is True (the default), this returns a list of named tuples.
+        Each tuple is as if it were returned by psutil.cpu_times(). If per_cpu
+        is False, this returns a single named tuple of the aforementioned type.
+        """
+        empty = [0 for i in range(0, self._cpu_times_len)]
+        cpu = [list(empty) for i in range(0, self._cpu_cores)]
+
+        if phase:
+            data = self.phase_usage(phase)
+        else:
+            data = self.range_usage(start, end)
+
+        for usage in data:
+            for i, core_values in enumerate(usage.cpu_times):
+                for j, v in enumerate(core_values):
+                    cpu[i][j] += v
+
+        if per_cpu:
+            return [self._cpu_times_type(*v) for v in cpu]
+
+        sums = list(empty)
+        for core in cpu:
+            for i, v in enumerate(core):
+                sums[i] += v
+
+        return self._cpu_times_type(*sums)
+
+    def aggregate_io(self, start=None, end=None, phase=None):
+        """Obtain aggregate I/O counters for a range.
+
+        Returns an iostat named tuple from psutil.
+        """
+
+        io = [0 for i in range(self._io_len)]
+
+        if phase:
+            data = self.phase_usage(phase)
+        else:
+            data = self.range_usage(start, end)
+
+        for usage in data:
+            for i, v in enumerate(usage.io):
+                io[i] += v
+
+        return self._io_type(*io)
+
+    def min_memory_available(self, start=None, end=None, phase=None):
+        """Return the minimum observed available memory number from a range.
+
+        Returns long bytes of memory available.
+
+        See psutil for notes on how this is calculated.
+        """
+        if phase:
+            data = self.phase_usage(phase)
+        else:
+            data = self.range_usage(start, end)
+
+        values = []
+
+        for usage in data:
+            values.append(usage.virt.available)
+
+        return min(values)
+
+    def max_memory_percent(self, start=None, end=None, phase=None):
+        """Returns the maximum percentage of system memory used.
+
+        Returns a float percentage. 1.00 would mean all system memory was in
+        use at one point.
+        """
+        if phase:
+            data = self.phase_usage(phase)
+        else:
+            data = self.range_usage(start, end)
+
+        values = []
+
+        for usage in data:
+            values.append(usage.virt.percent)
+
+        return max(values)
+
+    def as_dict(self):
+        """Convert the recorded data to a dict, suitable for serialization.
+
+        The returned dict has the following keys:
+
+          version - Integer version number being rendered. Currently 2.
+          cpu_times_fields - A list of the names of the CPU times fields.
+          io_fields - A list of the names of the I/O fields.
+          virt_fields - A list of the names of the virtual memory fields.
+          swap_fields - A list of the names of the swap memory fields.
+          samples - A list of dicts containing low-level measurements.
+          events - A list of lists representing point events. The inner list
+            has 2 elements, the float wall time of the event and the string
+            event name.
+          phases - A list of dicts describing phases. Each phase looks a lot
+            like an entry from samples (see below). Some phases may not have
+            data recorded against them, so some keys may be None.
+          overall - A dict representing overall resource usage. This resembles
+            a sample entry.
+          system - Contains additional information about the system including
+            number of processors and amount of memory.
+
+        Each entry in the sample list is a dict with the following keys:
+
+          start - Float wall time this measurement began on.
+          end - Float wall time this measurement ended on.
+          io - List of numerics for I/O values.
+          virt - List of numerics for virtual memory values.
+          swap - List of numerics for swap memory values.
+          cpu_percent - List of floats representing CPU percent on each core.
+          cpu_times - List of lists. Main list is each core. Inner lists are
+            lists of floats representing CPU times on that core.
+          cpu_percent_mean - Float of mean CPU percent across all cores.
+          cpu_times_sum - List of floats representing the sum of CPU times
+            across all cores.
+          cpu_times_total - Float representing the sum of all CPU times across
+            all cores. This is useful for calculating the percent in each CPU
+            time.
+        """
+
+        o = dict(
+            version=2,
+            cpu_times_fields=list(self._cpu_times_type._fields),
+            io_fields=list(self._io_type._fields),
+            virt_fields=list(self._virt_type._fields),
+            swap_fields=list(self._swap_type._fields),
+            samples=[],
+            phases=[],
+            system={},
+        )
+
+        def populate_derived(e):
+            if e["cpu_percent_cores"]:
+                # pylint --py3k W1619
+                e["cpu_percent_mean"] = sum(e["cpu_percent_cores"]) / len(
+                    e["cpu_percent_cores"]
+                )
+            else:
+                e["cpu_percent_mean"] = None
+
+            if e["cpu_times"]:
+                e["cpu_times_sum"] = [0.0] * self._cpu_times_len
+                for i in range(0, self._cpu_times_len):
+                    e["cpu_times_sum"][i] = sum(core[i] for core in e["cpu_times"])
+
+                e["cpu_times_total"] = sum(e["cpu_times_sum"])
+
+        def phase_entry(name, start, end):
+            e = dict(
+                name=name,
+                start=start,
+                end=end,
+                duration=end - start,
+                cpu_percent_cores=self.aggregate_cpu_percent(phase=name),
+                cpu_times=[list(c) for c in self.aggregate_cpu_times(phase=name)],
+                io=list(self.aggregate_io(phase=name)),
+            )
+            populate_derived(e)
+            return e
+
+        for m in self.measurements:
+            e = dict(
+                start=m.start,
+                end=m.end,
+                io=list(m.io),
+                virt=list(m.virt),
+                swap=list(m.swap),
+                cpu_percent_cores=list(m.cpu_percent),
+                cpu_times=list(list(cpu) for cpu in m.cpu_times),
+            )
+
+            populate_derived(e)
+            o["samples"].append(e)
+
+        if o["samples"]:
+            o["start"] = o["samples"][0]["start"]
+            o["end"] = o["samples"][-1]["end"]
+            o["duration"] = o["end"] - o["start"]
+            o["overall"] = phase_entry(None, o["start"], o["end"])
+        else:
+            o["start"] = None
+            o["end"] = None
+            o["duration"] = None
+            o["overall"] = None
+
+        o["events"] = [list(ev) for ev in self.events]
+
+        for phase, v in self.phases.items():
+            o["phases"].append(phase_entry(phase, v[0], v[1]))
+
+        if have_psutil:
+            o["system"].update(
+                dict(
+                    cpu_logical_count=psutil.cpu_count(logical=True),
+                    cpu_physical_count=psutil.cpu_count(logical=False),
+                    swap_total=psutil.swap_memory()[0],
+                    vmem_total=psutil.virtual_memory()[0],
+                )
+            )
+
+        return o
+
+    def as_profile(self):
+        profile_time = time.monotonic()
+        start_time = self.start_time
+        profile = {
+            "meta": {
+                "processType": 0,
+                "product": "mach",
+                "stackwalk": 0,
+                "version": 27,
+                "preprocessedProfileVersion": 47,
+                "symbolicationNotSupported": True,
+                "interval": self.poll_interval * 1000,
+                "startTime": self.start_timestamp * 1000,
+                "profilingStartTime": 0,
+                "logicalCPUs": psutil.cpu_count(logical=True),
+                "physicalCPUs": psutil.cpu_count(logical=False),
+                "mainMemory": psutil.virtual_memory()[0],
+                "markerSchema": [
+                    {
+                        "name": "Phase",
+                        "tooltipLabel": "{marker.data.phase}",
+                        "tableLabel": "{marker.name} — {marker.data.phase} — CPU time: {marker.data.cpuTime} ({marker.data.cpuPercent})",
+                        "chartLabel": "{marker.data.phase}",
+                        "display": [
+                            "marker-chart",
+                            "marker-table",
+                            "timeline-overview",
+                        ],
+                        "data": [
+                            {
+                                "key": "cpuTime",
+                                "label": "CPU Time",
+                                "format": "duration",
+                            },
+                            {
+                                "key": "cpuPercent",
+                                "label": "CPU Percent",
+                                "format": "string",
+                            },
+                        ],
+                    },
+                    {
+                        "name": "Text",
+                        "tooltipLabel": "{marker.name}",
+                        "tableLabel": "{marker.name} — {marker.data.text}",
+                        "chartLabel": "{marker.data.text}",
+                        "display": ["marker-chart", "marker-table"],
+                        "data": [
+                            {
+                                "key": "text",
+                                "label": "Description",
+                                "format": "string",
+                                "searchable": True,
+                            }
+                        ],
+                    },
+                    {
+                        "name": "Mem",
+                        "tooltipLabel": "{marker.name}",
+                        "display": [],
+                        "data": [
+                            {"key": "used", "label": "Memory Used", "format": "bytes"},
+                            {
+                                "key": "cached",
+                                "label": "Memory cached",
+                                "format": "bytes",
+                            },
+                            {
+                                "key": "buffers",
+                                "label": "Memory buffers",
+                                "format": "bytes",
+                            },
+                        ],
+                        "graphs": [
+                            {"key": "used", "color": "orange", "type": "line-filled"}
+                        ],
+                    },
+                    {
+                        "name": "IO",
+                        "tooltipLabel": "{marker.name}",
+                        "display": [],
+                        "data": [
+                            {
+                                "key": "write_bytes",
+                                "label": "Written",
+                                "format": "bytes",
+                            },
+                            {
+                                "key": "write_count",
+                                "label": "Write count",
+                                "format": "integer",
+                            },
+                            {"key": "read_bytes", "label": "Read", "format": "bytes"},
+                            {
+                                "key": "read_count",
+                                "label": "Read count",
+                                "format": "integer",
+                            },
+                        ],
+                        "graphs": [
+                            {"key": "read_bytes", "color": "green", "type": "bar"},
+                            {"key": "write_bytes", "color": "red", "type": "bar"},
+                        ],
+                    },
+                ],
+                "usesOnlyOneStackType": True,
+            },
+            "libs": [],
+            "threads": [
+                {
+                    "processType": "default",
+                    "processName": "mach",
+                    "processStartupTime": 0,
+                    "processShutdownTime": None,
+                    "registerTime": 0,
+                    "unregisterTime": None,
+                    "pausedRanges": [],
+                    "showMarkersInTimeline": True,
+                    "name": "",
+                    "isMainThread": False,
+                    "pid": "0",
+                    "tid": 0,
+                    "samples": {
+                        "weightType": "samples",
+                        "weight": None,
+                        "stack": [],
+                        "time": [],
+                        "length": 0,
+                    },
+                    "stringArray": ["(root)"],
+                    "markers": {
+                        "data": [],
+                        "name": [],
+                        "startTime": [],
+                        "endTime": [],
+                        "phase": [],
+                        "category": [],
+                        "length": 0,
+                    },
+                    "stackTable": {
+                        "frame": [0],
+                        "prefix": [None],
+                        "category": [0],
+                        "subcategory": [0],
+                        "length": 1,
+                    },
+                    "frameTable": {
+                        "address": [-1],
+                        "inlineDepth": [0],
+                        "category": [None],
+                        "subcategory": [0],
+                        "func": [0],
+                        "nativeSymbol": [None],
+                        "innerWindowID": [0],
+                        "implementation": [None],
+                        "line": [None],
+                        "column": [None],
+                        "length": 1,
+                    },
+                    "funcTable": {
+                        "isJS": [False],
+                        "relevantForJS": [False],
+                        "name": [0],
+                        "resource": [-1],
+                        "fileName": [None],
+                        "lineNumber": [None],
+                        "columnNumber": [None],
+                        "length": 1,
+                    },
+                    "resourceTable": {
+                        "lib": [],
+                        "name": [],
+                        "host": [],
+                        "type": [],
+                        "length": 0,
+                    },
+                    "nativeSymbols": {
+                        "libIndex": [],
+                        "address": [],
+                        "name": [],
+                        "functionSize": [],
+                        "length": 0,
+                    },
+                }
+            ],
+            "counters": [],
+        }
+
+        firstThread = profile["threads"][0]
+        markers = firstThread["markers"]
+        for key in self.metadata:
+            profile["meta"][key] = self.metadata[key]
+
+        def get_string_index(string):
+            stringArray = firstThread["stringArray"]
+            try:
+                return stringArray.index(string)
+            except ValueError:
+                stringArray.append(string)
+                return len(stringArray) - 1
+
+        def add_marker(name_index, start, end, data, precision=None):
+            # The precision argument allows setting how many digits after the
+            # decimal point are desired.
+            # For resource use samples where we sample with a timer, an integer
+            # number of ms is good enough.
+            # For short duration markers, the profiler front-end may show up to
+            # 3 digits after the decimal point (ie. µs precision).
+            markers["startTime"].append(round((start - start_time) * 1000, precision))
+            if end is None:
+                markers["endTime"].append(None)
+                # 0 = Instant marker
+                markers["phase"].append(0)
+            else:
+                markers["endTime"].append(round((end - start_time) * 1000, precision))
+                # 1 = marker with start and end times, 2 = start but no end.
+                markers["phase"].append(1)
+            markers["category"].append(0)
+            markers["name"].append(name_index)
+            markers["data"].append(data)
+            markers["length"] = markers["length"] + 1
+
+        def format_percent(value):
+            return str(round(value, 1)) + "%"
+
+        samples = firstThread["samples"]
+        samples["stack"].append(0)
+        samples["time"].append(0)
+
+        cpu_string_index = get_string_index("CPU Use")
+        memory_string_index = get_string_index("Memory")
+        io_string_index = get_string_index("IO")
+        valid_cpu_fields = set()
+        for m in self.measurements:
+            # Ignore samples that are much too short.
+            if m.end - m.start < self.poll_interval / 10:
+                continue
+
+            # Sample times
+            samples["stack"].append(0)
+            samples["time"].append(round((m.end - start_time) * 1000))
+
+            # CPU
+            markerData = {
+                "type": "CPU",
+                "cpuPercent": format_percent(
+                    sum(list(m.cpu_percent)) / len(m.cpu_percent)
+                ),
+            }
+
+            # due to inconsistencies in the sampling rate, sometimes the
+            # cpu_times add up to more than 100%, causing annoying
+            # spikes in the CPU use charts. Avoid them by dividing the
+            # values by the total if it is above 1.
+            total = 0
+            for field in ["nice", "user", "system", "iowait", "softirq", "idle"]:
+                if hasattr(m.cpu_times[0], field):
+                    total += sum(getattr(core, field) for core in m.cpu_times) / (
+                        m.end - m.start
+                    )
+            divisor = total if total > 1 else 1
+
+            total = 0
+            for field in ["nice", "user", "system", "iowait", "softirq"]:
+                if hasattr(m.cpu_times[0], field):
+                    total += (
+                        sum(getattr(core, field) for core in m.cpu_times)
+                        / (m.end - m.start)
+                        / divisor
+                    )
+                    if total > 0:
+                        valid_cpu_fields.add(field)
+                    markerData[field] = round(total, 3)
+            for field in ["nice", "user", "system", "iowait", "idle"]:
+                if hasattr(m.cpu_times[0], field):
+                    markerData[field + "_pct"] = format_percent(
+                        100
+                        * sum(getattr(core, field) for core in m.cpu_times)
+                        / (m.end - m.start)
+                        / len(m.cpu_times)
+                    )
+            add_marker(cpu_string_index, m.start, m.end, markerData)
+
+            # Memory
+            markerData = {"type": "Mem", "used": m.virt.used}
+            if hasattr(m.virt, "cached"):
+                markerData["cached"] = m.virt.cached
+            if hasattr(m.virt, "buffers"):
+                markerData["buffers"] = m.virt.buffers
+            add_marker(memory_string_index, m.start, m.end, markerData)
+
+            # IO
+            markerData = {
+                "type": "IO",
+                "read_count": m.io.read_count,
+                "read_bytes": m.io.read_bytes,
+                "write_count": m.io.write_count,
+                "write_bytes": m.io.write_bytes,
+            }
+            add_marker(io_string_index, m.start, m.end, markerData)
+        samples["length"] = len(samples["stack"])
+
+        # The marker schema for CPU markers should only contain graph
+        # definitions for fields we actually have, or the profiler front-end
+        # will detect missing data and skip drawing the track entirely.
+        cpuSchema = {
+            "name": "CPU",
+            "tooltipLabel": "{marker.name}",
+            "display": [],
+            "data": [{"key": "cpuPercent", "label": "CPU Percent", "format": "string"}],
+            "graphs": [],
+        }
+        cpuData = cpuSchema["data"]
+        for field, label in {
+            "user": "User %",
+            "iowait": "IO Wait %",
+            "system": "System %",
+            "nice": "Nice %",
+            "idle": "Idle %",
+        }.items():
+            if field in valid_cpu_fields or field == "idle":
+                cpuData.append(
+                    {"key": field + "_pct", "label": label, "format": "string"}
+                )
+        cpuGraphs = cpuSchema["graphs"]
+        for field, color in {
+            "softirq": "orange",
+            "iowait": "red",
+            "system": "grey",
+            "user": "yellow",
+            "nice": "blue",
+        }.items():
+            if field in valid_cpu_fields:
+                cpuGraphs.append({"key": field, "color": color, "type": "bar"})
+        profile["meta"]["markerSchema"].insert(0, cpuSchema)
+
+        # Create markers for phases
+        phase_string_index = get_string_index("Phase")
+        for phase, v in self.phases.items():
+            markerData = {"type": "Phase", "phase": phase}
+
+            cpu_percent_cores = self.aggregate_cpu_percent(phase=phase)
+            if cpu_percent_cores:
+                markerData["cpuPercent"] = format_percent(
+                    sum(cpu_percent_cores) / len(cpu_percent_cores)
+                )
+
+            cpu_times = [list(c) for c in self.aggregate_cpu_times(phase=phase)]
+            cpu_times_sum = [0.0] * self._cpu_times_len
+            for i in range(0, self._cpu_times_len):
+                cpu_times_sum[i] = sum(core[i] for core in cpu_times)
+            total_cpu_time_ms = sum(cpu_times_sum) * 1000
+            if total_cpu_time_ms > 0:
+                markerData["cpuTime"] = total_cpu_time_ms
+
+            add_marker(phase_string_index, v[0], v[1], markerData, 3)
+
+        # Add generic markers
+        for name, start, end, text in self.markers:
+            markerData = {"type": "Text"}
+            if text:
+                markerData["text"] = text
+            add_marker(get_string_index(name), start, end, markerData, 3)
+        if self.events:
+            event_string_index = get_string_index("Event")
+            for event_time, text in self.events:
+                if text:
+                    add_marker(
+                        event_string_index,
+                        event_time,
+                        None,
+                        {"type": "Text", "text": text},
+                        3,
+                    )
+
+        # We may have spent some time generating this profile, and there might
+        # also have been some time elapsed between stopping the resource
+        # monitor, and the profile being created. These are hidden costs that
+        # we should account for as best as possible, and the best we can do
+        # is to make the profile contain information about this cost somehow.
+        # We extend the profile end time up to now rather than self.end_time,
+        # and add a phase covering that period of time.
+        now = time.monotonic()
+        profile["meta"]["profilingEndTime"] = round(
+            (now - self.start_time) * 1000 + 0.0005, 3
+        )
+        markerData = {
+            "type": "Phase",
+            "phase": "teardown",
+        }
+        add_marker(phase_string_index, self.stop_time, now, markerData, 3)
+        teardown_string_index = get_string_index("resourcemonitor")
+        markerData = {
+            "type": "Text",
+            "text": "stop",
+        }
+        add_marker(teardown_string_index, self.stop_time, self.end_time, markerData, 3)
+        markerData = {
+            "type": "Text",
+            "text": "as_profile",
+        }
+        add_marker(teardown_string_index, profile_time, now, markerData, 3)
+
+        # Unfortunately, whatever the caller does with the profile (e.g. json)
+        # or after that (hopefully, exit) is not going to be counted, but we
+        # assume it's fast enough.
+        return profile
diff --git a/testing/mozbase/mozsystemmonitor/setup.cfg b/testing/mozbase/mozsystemmonitor/setup.cfg
new file mode 100644
index 0000000000..3c6e79cf31
--- /dev/null
+++ b/testing/mozbase/mozsystemmonitor/setup.cfg
@@ -0,0 +1,2 @@
+[bdist_wheel]
+universal=1
diff --git a/testing/mozbase/mozsystemmonitor/setup.py b/testing/mozbase/mozsystemmonitor/setup.py
new file mode 100644
index 0000000000..7b575ef4bd
--- /dev/null
+++ b/testing/mozbase/mozsystemmonitor/setup.py
@@ -0,0 +1,33 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import os
+
+from setuptools import setup
+
+PACKAGE_VERSION = "1.0.1"
+
+try:
+    pwd = os.path.dirname(os.path.abspath(__file__))
+    description = open(os.path.join(pwd, "README.rst")).read()
+except Exception:
+    description = ""
+
+setup(
+    name="mozsystemmonitor",
+    description="Monitor system resource usage.",
+    long_description="see https://firefox-source-docs.mozilla.org/mozbase/index.html",
+    classifiers=[
+        "Programming Language :: Python :: 3",
+    ],
+    # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
+    license="MPL 2.0",
+    keywords="mozilla",
+    author="Mozilla Automation and Tools Team",
+    author_email="tools@lists.mozilla.org",
+    url="https://wiki.mozilla.org/Auto-tools/Projects/Mozbase",
+    packages=["mozsystemmonitor"],
+    version=PACKAGE_VERSION,
+    install_requires=["psutil >= 3.1.1"],
+)
diff --git a/testing/mozbase/mozsystemmonitor/tests/manifest.toml b/testing/mozbase/mozsystemmonitor/tests/manifest.toml
new file mode 100644
index 0000000000..babb20c1b0
--- /dev/null
+++ b/testing/mozbase/mozsystemmonitor/tests/manifest.toml
@@ -0,0 +1,4 @@
+[DEFAULT]
+subsuite = "mozbase"
+
+["test_resource_monitor.py"]
diff --git a/testing/mozbase/mozsystemmonitor/tests/test_resource_monitor.py b/testing/mozbase/mozsystemmonitor/tests/test_resource_monitor.py
new file mode 100644
index 0000000000..6de99152b9
--- /dev/null
+++ b/testing/mozbase/mozsystemmonitor/tests/test_resource_monitor.py
@@ -0,0 +1,183 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import multiprocessing
+import time
+import unittest
+
+import mozunit
+from six import integer_types
+
+try:
+    import psutil
+except ImportError:
+    psutil = None
+
+from mozsystemmonitor.resourcemonitor import SystemResourceMonitor, SystemResourceUsage
+
+
+@unittest.skipIf(psutil is None, "Resource monitor requires psutil.")
+class TestResourceMonitor(unittest.TestCase):
+    def test_basic(self):
+        monitor = SystemResourceMonitor(poll_interval=0.5)
+
+        monitor.start()
+        time.sleep(3)
+
+        monitor.stop()
+
+        data = list(monitor.range_usage())
+        self.assertGreater(len(data), 3)
+
+        self.assertIsInstance(data[0], SystemResourceUsage)
+
+    def test_empty(self):
+        monitor = SystemResourceMonitor(poll_interval=2.0)
+        monitor.start()
+        monitor.stop()
+
+        data = list(monitor.range_usage())
+        self.assertEqual(len(data), 0)
+
+    def test_phases(self):
+        monitor = SystemResourceMonitor(poll_interval=0.25)
+
+        monitor.start()
+        time.sleep(1)
+
+        with monitor.phase("phase1"):
+            time.sleep(1)
+
+            with monitor.phase("phase2"):
+                time.sleep(1)
+
+        monitor.stop()
+
+        self.assertEqual(len(monitor.phases), 2)
+        self.assertEqual(["phase2", "phase1"], list(monitor.phases.keys()))
+
+        all = list(monitor.range_usage())
+        data1 = list(monitor.phase_usage("phase1"))
+        data2 = list(monitor.phase_usage("phase2"))
+
+        self.assertGreater(len(all), len(data1))
+        self.assertGreater(len(data1), len(data2))
+
+        # This could fail if time.monotonic() takes more than 0.1s. It really
+        # shouldn't.
+        self.assertAlmostEqual(data1[-1].end, data2[-1].end, delta=0.25)
+
+    def test_no_data(self):
+        monitor = SystemResourceMonitor()
+
+        data = list(monitor.range_usage())
+        self.assertEqual(len(data), 0)
+
+    def test_events(self):
+        monitor = SystemResourceMonitor(poll_interval=0.25)
+
+        monitor.start()
+        time.sleep(0.5)
+
+        t0 = time.monotonic()
+        monitor.record_event("t0")
+        time.sleep(2)
+
+        monitor.record_event("t1")
+        time.sleep(0.5)
+        monitor.stop()
+
+        events = monitor.events
+        self.assertEqual(len(events), 2)
+
+        event = events[0]
+
+        self.assertEqual(event[1], "t0")
+        self.assertAlmostEqual(event[0], t0, delta=0.25)
+
+        data = list(monitor.between_events_usage("t0", "t1"))
+        self.assertGreater(len(data), 0)
+
+    def test_aggregate_cpu(self):
+        monitor = SystemResourceMonitor(poll_interval=0.25)
+
+        monitor.start()
+        time.sleep(1)
+        monitor.stop()
+
+        values = monitor.aggregate_cpu_percent()
+        self.assertIsInstance(values, list)
+        self.assertEqual(len(values), multiprocessing.cpu_count())
+        for v in values:
+            self.assertIsInstance(v, float)
+
+        value = monitor.aggregate_cpu_percent(per_cpu=False)
+        self.assertIsInstance(value, float)
+
+        values = monitor.aggregate_cpu_times()
+        self.assertIsInstance(values, list)
+        self.assertGreater(len(values), 0)
+        self.assertTrue(hasattr(values[0], "user"))
+
+        t = type(values[0])
+
+        value = monitor.aggregate_cpu_times(per_cpu=False)
+        self.assertIsInstance(value, t)
+
+    def test_aggregate_io(self):
+        monitor = SystemResourceMonitor(poll_interval=0.25)
+
+        # There's really no easy way to ensure I/O occurs. For all we know
+        # reads and writes will all be serviced by the page cache.
+        monitor.start()
+        time.sleep(1.0)
+        monitor.stop()
+
+        values = monitor.aggregate_io()
+        self.assertTrue(hasattr(values, "read_count"))
+
+    def test_memory(self):
+        monitor = SystemResourceMonitor(poll_interval=0.25)
+
+        monitor.start()
+        time.sleep(1.0)
+        monitor.stop()
+
+        v = monitor.min_memory_available()
+        self.assertIsInstance(v, integer_types)
+
+        v = monitor.max_memory_percent()
+        self.assertIsInstance(v, float)
+
+    def test_as_dict(self):
+        monitor = SystemResourceMonitor(poll_interval=0.25)
+
+        monitor.start()
+        time.sleep(0.1)
+        monitor.begin_phase("phase1")
+        monitor.record_event("foo")
+        time.sleep(0.1)
+        monitor.begin_phase("phase2")
+        monitor.record_event("bar")
+        time.sleep(0.2)
+        monitor.finish_phase("phase1")
+        time.sleep(0.2)
+        monitor.finish_phase("phase2")
+        time.sleep(0.4)
+        monitor.stop()
+
+        d = monitor.as_dict()
+
+        self.assertEqual(d["version"], 2)
+        self.assertEqual(len(d["events"]), 2)
+        self.assertEqual(len(d["phases"]), 2)
+        self.assertIn("system", d)
+        self.assertIsInstance(d["system"], dict)
+        self.assertIsInstance(d["overall"], dict)
+        self.assertIn("duration", d["overall"])
+        self.assertIn("cpu_times", d["overall"])
+
+
+if __name__ == "__main__":
+    mozunit.main()
-- 
cgit v1.2.3