summaryrefslogtreecommitdiffstats
path: root/python/mozperftest/mozperftest/metrics/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/mozperftest/mozperftest/metrics/common.py')
-rw-r--r--python/mozperftest/mozperftest/metrics/common.py356
1 files changed, 356 insertions, 0 deletions
diff --git a/python/mozperftest/mozperftest/metrics/common.py b/python/mozperftest/mozperftest/metrics/common.py
new file mode 100644
index 0000000000..3598cd378a
--- /dev/null
+++ b/python/mozperftest/mozperftest/metrics/common.py
@@ -0,0 +1,356 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+from collections import defaultdict
+from pathlib import Path
+
+from mozperftest.metrics.exceptions import (
+ MetricsMissingResultsError,
+ MetricsMultipleTransformsError,
+)
+from mozperftest.metrics.notebook import PerftestETL
+from mozperftest.metrics.utils import metric_fields, validate_intermediate_results
+
+COMMON_ARGS = {
+ "metrics": {
+ "type": metric_fields,
+ "nargs": "*",
+ "default": [],
+ "help": "The metrics that should be retrieved from the data.",
+ },
+ "prefix": {"type": str, "default": "", "help": "Prefix used by the output files."},
+ "split-by": {
+ "type": str,
+ "default": None,
+ "help": "A metric name to use for splitting the data. For instance, "
+ "using browserScripts.pageinfo.url will split the data by the unique "
+ "URLs that are found.",
+ },
+ "simplify-names": {
+ "action": "store_true",
+ "default": False,
+ "help": "If set, metric names will be simplified to a single word. The PerftestETL "
+ "combines dictionary keys by `.`, and the final key contains that value of the data. "
+ "That final key becomes the new name of the metric.",
+ },
+ "simplify-exclude": {
+ "nargs": "*",
+ "default": ["statistics"],
+ "help": "When renaming/simplifying metric names, entries with these strings "
+ "will be ignored and won't get simplified. These options are only used when "
+ "--simplify-names is set.",
+ },
+ "transformer": {
+ "type": str,
+ "default": None,
+ "help": "The path to the file containing the custom transformer, "
+ "or the module to import along with the class name, "
+ "e.g. mozperftest.test.xpcshell:XpcShellTransformer",
+ },
+}
+
+
+class MetricsStorage(object):
+ """Holds data that is commonly used across all metrics layers.
+
+ An instance of this class represents data for a given and output
+ path and prefix.
+ """
+
+ def __init__(self, output_path, prefix, logger):
+ self.prefix = prefix
+ self.output_path = output_path
+ self.stddata = {}
+ self.ptnb_config = {}
+ self.results = []
+ self.logger = logger
+
+ p = Path(output_path)
+ p.mkdir(parents=True, exist_ok=True)
+
+ def _parse_results(self, results):
+ if isinstance(results, dict):
+ return [results]
+ res = []
+ # XXX we need to embrace pathlib everywhere.
+ if isinstance(results, (str, Path)):
+ # Expecting a single path or a directory
+ p = Path(results)
+ if not p.exists():
+ self.logger.warning("Given path does not exist: {}".format(results))
+ elif p.is_dir():
+ files = [f for f in p.glob("**/*.json") if not f.is_dir()]
+ res.extend(self._parse_results(files))
+ else:
+ res.append(p.as_posix())
+ if isinstance(results, list):
+ # Expecting a list of paths
+ for path in results:
+ res.extend(self._parse_results(path))
+ return res
+
+ def set_results(self, results):
+ """Processes and sets results provided by the metadata.
+
+ `results` can be a path to a file or a directory. Every
+ file is scanned and we build a list. Alternatively, it
+ can be a mapping containing the results, in that case
+ we just use it direcly, but keep it in a list.
+
+ :param results list/dict/str: Path, or list of paths to the data
+ (or the data itself in a dict) of the data to be processed.
+ """
+ # Parse the results into files (for now) and the settings
+ self.results = defaultdict(lambda: defaultdict(list))
+ self.settings = defaultdict(dict)
+ for res in results:
+ # Ensure that the results are valid before continuing
+ validate_intermediate_results(res)
+
+ name = res["name"]
+ if isinstance(res["results"], dict):
+ # XXX Implement subtest based parsing
+ raise NotImplementedError(
+ "Subtest-based processing is not implemented yet"
+ )
+
+ # Merge all entries with the same name into one
+ # result, if separation is needed use unique names
+ self.results[name]["files"].extend(self._parse_results(res["results"]))
+
+ suite_settings = self.settings[name]
+ for key, val in res.items():
+ if key == "results":
+ continue
+ suite_settings[key] = val
+
+ # Check the transform definitions
+ currtrfm = self.results[name]["transformer"]
+ if not currtrfm:
+ self.results[name]["transformer"] = res.get(
+ "transformer", "SingleJsonRetriever"
+ )
+ elif currtrfm != res.get("transformer", "SingleJsonRetriever"):
+ raise MetricsMultipleTransformsError(
+ f"Only one transformer allowed per data name! Found multiple for {name}: "
+ f"{[currtrfm, res['transformer']]}"
+ )
+
+ # Get the transform options if available
+ self.results[name]["options"] = res.get("transformer-options", {})
+
+ if not self.results:
+ self.return_code = 1
+ raise MetricsMissingResultsError("Could not find any results to process.")
+
+ def get_standardized_data(self, group_name="firefox", transformer=None):
+ """Returns a parsed, standardized results data set.
+
+ The dataset is computed once then cached unless overwrite is used.
+ The transformer dictates how the data will be parsed, by default it uses
+ a JSON transformer that flattens the dictionary while merging all the
+ common metrics together.
+
+ :param group_name str: The name for this results group.
+ :param transformer str: The name of the transformer to use
+ when parsing the data. Currently, only SingleJsonRetriever
+ is available.
+ :param overwrite str: if True, we recompute the results
+ :return dict: Standardized notebook data with containing the
+ requested metrics.
+ """
+ if self.stddata:
+ return self.stddata
+
+ for data_type, data_info in self.results.items():
+ tfm = transformer if transformer is not None else data_info["transformer"]
+ prefix = data_type
+ if self.prefix:
+ prefix = "{}-{}".format(self.prefix, data_type)
+
+ # Primarily used to store the transformer used on the data
+ # so that it can also be used for generating things
+ # like summary values for suites, and subtests.
+ self.ptnb_config[data_type] = {
+ "output": self.output_path,
+ "prefix": prefix,
+ "custom_transformer": tfm,
+ "file_groups": {data_type: data_info["files"]},
+ }
+
+ ptnb = PerftestETL(
+ file_groups=self.ptnb_config[data_type]["file_groups"],
+ config=self.ptnb_config[data_type],
+ prefix=self.prefix,
+ logger=self.logger,
+ custom_transform=tfm,
+ )
+ r = ptnb.process(**data_info["options"])
+ self.stddata[data_type] = r["data"]
+
+ return self.stddata
+
+ def filtered_metrics(
+ self,
+ group_name="firefox",
+ transformer=None,
+ metrics=None,
+ exclude=None,
+ split_by=None,
+ simplify_names=False,
+ simplify_exclude=["statistics"],
+ ):
+ """Filters the metrics to only those that were requested by `metrics`.
+
+ If metrics is Falsey (None, empty list, etc.) then no metrics
+ will be filtered. The entries in metrics are pattern matched with
+ the subtests in the standardized data (not a regular expression).
+ For example, if "firstPaint" is in metrics, then all subtests which
+ contain this string in their name will be kept.
+
+ :param metrics list: List of metrics to keep.
+ :param exclude list: List of string matchers to exclude from the metrics
+ gathered/reported.
+ :param split_by str: The name of a metric to use to split up data by.
+ :param simplify_exclude list: List of string matchers to exclude
+ from the naming simplification process.
+ :return dict: Standardized notebook data containing the
+ requested metrics.
+ """
+ results = self.get_standardized_data(
+ group_name=group_name, transformer=transformer
+ )
+ if not metrics:
+ return results
+ if not exclude:
+ exclude = []
+ if not simplify_exclude:
+ simplify_exclude = []
+
+ # Get the field to split the results by (if any)
+ if split_by is not None:
+ splitting_entry = None
+ for data_type, data_info in results.items():
+ for res in data_info:
+ if split_by in res["subtest"]:
+ splitting_entry = res
+ break
+ if splitting_entry is not None:
+ split_by = defaultdict(list)
+ for c, entry in enumerate(splitting_entry["data"]):
+ split_by[entry["value"]].append(c)
+
+ # Filter metrics
+ filtered = {}
+ for data_type, data_info in results.items():
+ newresults = []
+ for res in data_info:
+ if any([met["name"] in res["subtest"] for met in metrics]) and not any(
+ [met in res["subtest"] for met in exclude]
+ ):
+ res["transformer"] = self.ptnb_config[data_type][
+ "custom_transformer"
+ ]
+ newresults.append(res)
+ filtered[data_type] = newresults
+
+ # Simplify the filtered metric names
+ if simplify_names:
+
+ def _simplify(name):
+ if any([met in name for met in simplify_exclude]):
+ return None
+ return name.split(".")[-1]
+
+ self._alter_name(filtered, res, filter=_simplify)
+
+ # Split the filtered results
+ if split_by is not None:
+ newfilt = {}
+ total_iterations = sum([len(inds) for _, inds in split_by.items()])
+ for data_type in filtered:
+ if not filtered[data_type]:
+ # Ignore empty data types
+ continue
+
+ newresults = []
+ newfilt[data_type] = newresults
+ for split, indices in split_by.items():
+ for res in filtered[data_type]:
+ if len(res["data"]) != total_iterations:
+ # Skip data that cannot be split
+ continue
+ splitres = {key: val for key, val in res.items()}
+ splitres["subtest"] += " " + split
+ splitres["data"] = [res["data"][i] for i in indices]
+ splitres["transformer"] = self.ptnb_config[data_type][
+ "custom_transformer"
+ ]
+
+ newresults.append(splitres)
+
+ filtered = newfilt
+
+ return filtered
+
+ def _alter_name(self, filtered, res, filter):
+ previous = []
+ for data_type, data_info in filtered.items():
+ for res in data_info:
+ new = filter(res["subtest"])
+ if new is None:
+ continue
+ if new in previous:
+ self.logger.warning(
+ f"Another metric which ends with `{new}` was already found. "
+ f"{res['subtest']} will not be simplified."
+ )
+ continue
+ res["subtest"] = new
+ previous.append(new)
+
+
+_metrics = {}
+
+
+def filtered_metrics(
+ metadata,
+ path,
+ prefix,
+ group_name="firefox",
+ transformer=None,
+ metrics=None,
+ settings=False,
+ exclude=None,
+ split_by=None,
+ simplify_names=False,
+ simplify_exclude=["statistics"],
+):
+ """Returns standardized data extracted from the metadata instance.
+
+ We're caching an instance of MetricsStorage per metrics/storage
+ combination and compute the data only once when this function is called.
+ """
+ key = path, prefix
+ if key not in _metrics:
+ storage = _metrics[key] = MetricsStorage(path, prefix, metadata)
+ storage.set_results(metadata.get_results())
+ else:
+ storage = _metrics[key]
+
+ results = storage.filtered_metrics(
+ group_name=group_name,
+ transformer=transformer,
+ metrics=metrics,
+ exclude=exclude,
+ split_by=split_by,
+ simplify_names=simplify_names,
+ simplify_exclude=simplify_exclude,
+ )
+
+ # XXX returning two different types is a problem
+ # in case settings is false, we should return None for it
+ # and always return a 2-tuple
+ if settings:
+ return results, storage.settings
+ return results