diff options
Diffstat (limited to 'python/mozperftest/mozperftest/metrics/common.py')
-rw-r--r-- | python/mozperftest/mozperftest/metrics/common.py | 356 |
1 files changed, 356 insertions, 0 deletions
diff --git a/python/mozperftest/mozperftest/metrics/common.py b/python/mozperftest/mozperftest/metrics/common.py new file mode 100644 index 0000000000..3598cd378a --- /dev/null +++ b/python/mozperftest/mozperftest/metrics/common.py @@ -0,0 +1,356 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +from collections import defaultdict +from pathlib import Path + +from mozperftest.metrics.exceptions import ( + MetricsMissingResultsError, + MetricsMultipleTransformsError, +) +from mozperftest.metrics.notebook import PerftestETL +from mozperftest.metrics.utils import metric_fields, validate_intermediate_results + +COMMON_ARGS = { + "metrics": { + "type": metric_fields, + "nargs": "*", + "default": [], + "help": "The metrics that should be retrieved from the data.", + }, + "prefix": {"type": str, "default": "", "help": "Prefix used by the output files."}, + "split-by": { + "type": str, + "default": None, + "help": "A metric name to use for splitting the data. For instance, " + "using browserScripts.pageinfo.url will split the data by the unique " + "URLs that are found.", + }, + "simplify-names": { + "action": "store_true", + "default": False, + "help": "If set, metric names will be simplified to a single word. The PerftestETL " + "combines dictionary keys by `.`, and the final key contains that value of the data. " + "That final key becomes the new name of the metric.", + }, + "simplify-exclude": { + "nargs": "*", + "default": ["statistics"], + "help": "When renaming/simplifying metric names, entries with these strings " + "will be ignored and won't get simplified. These options are only used when " + "--simplify-names is set.", + }, + "transformer": { + "type": str, + "default": None, + "help": "The path to the file containing the custom transformer, " + "or the module to import along with the class name, " + "e.g. mozperftest.test.xpcshell:XpcShellTransformer", + }, +} + + +class MetricsStorage(object): + """Holds data that is commonly used across all metrics layers. + + An instance of this class represents data for a given and output + path and prefix. + """ + + def __init__(self, output_path, prefix, logger): + self.prefix = prefix + self.output_path = output_path + self.stddata = {} + self.ptnb_config = {} + self.results = [] + self.logger = logger + + p = Path(output_path) + p.mkdir(parents=True, exist_ok=True) + + def _parse_results(self, results): + if isinstance(results, dict): + return [results] + res = [] + # XXX we need to embrace pathlib everywhere. + if isinstance(results, (str, Path)): + # Expecting a single path or a directory + p = Path(results) + if not p.exists(): + self.logger.warning("Given path does not exist: {}".format(results)) + elif p.is_dir(): + files = [f for f in p.glob("**/*.json") if not f.is_dir()] + res.extend(self._parse_results(files)) + else: + res.append(p.as_posix()) + if isinstance(results, list): + # Expecting a list of paths + for path in results: + res.extend(self._parse_results(path)) + return res + + def set_results(self, results): + """Processes and sets results provided by the metadata. + + `results` can be a path to a file or a directory. Every + file is scanned and we build a list. Alternatively, it + can be a mapping containing the results, in that case + we just use it direcly, but keep it in a list. + + :param results list/dict/str: Path, or list of paths to the data + (or the data itself in a dict) of the data to be processed. + """ + # Parse the results into files (for now) and the settings + self.results = defaultdict(lambda: defaultdict(list)) + self.settings = defaultdict(dict) + for res in results: + # Ensure that the results are valid before continuing + validate_intermediate_results(res) + + name = res["name"] + if isinstance(res["results"], dict): + # XXX Implement subtest based parsing + raise NotImplementedError( + "Subtest-based processing is not implemented yet" + ) + + # Merge all entries with the same name into one + # result, if separation is needed use unique names + self.results[name]["files"].extend(self._parse_results(res["results"])) + + suite_settings = self.settings[name] + for key, val in res.items(): + if key == "results": + continue + suite_settings[key] = val + + # Check the transform definitions + currtrfm = self.results[name]["transformer"] + if not currtrfm: + self.results[name]["transformer"] = res.get( + "transformer", "SingleJsonRetriever" + ) + elif currtrfm != res.get("transformer", "SingleJsonRetriever"): + raise MetricsMultipleTransformsError( + f"Only one transformer allowed per data name! Found multiple for {name}: " + f"{[currtrfm, res['transformer']]}" + ) + + # Get the transform options if available + self.results[name]["options"] = res.get("transformer-options", {}) + + if not self.results: + self.return_code = 1 + raise MetricsMissingResultsError("Could not find any results to process.") + + def get_standardized_data(self, group_name="firefox", transformer=None): + """Returns a parsed, standardized results data set. + + The dataset is computed once then cached unless overwrite is used. + The transformer dictates how the data will be parsed, by default it uses + a JSON transformer that flattens the dictionary while merging all the + common metrics together. + + :param group_name str: The name for this results group. + :param transformer str: The name of the transformer to use + when parsing the data. Currently, only SingleJsonRetriever + is available. + :param overwrite str: if True, we recompute the results + :return dict: Standardized notebook data with containing the + requested metrics. + """ + if self.stddata: + return self.stddata + + for data_type, data_info in self.results.items(): + tfm = transformer if transformer is not None else data_info["transformer"] + prefix = data_type + if self.prefix: + prefix = "{}-{}".format(self.prefix, data_type) + + # Primarily used to store the transformer used on the data + # so that it can also be used for generating things + # like summary values for suites, and subtests. + self.ptnb_config[data_type] = { + "output": self.output_path, + "prefix": prefix, + "custom_transformer": tfm, + "file_groups": {data_type: data_info["files"]}, + } + + ptnb = PerftestETL( + file_groups=self.ptnb_config[data_type]["file_groups"], + config=self.ptnb_config[data_type], + prefix=self.prefix, + logger=self.logger, + custom_transform=tfm, + ) + r = ptnb.process(**data_info["options"]) + self.stddata[data_type] = r["data"] + + return self.stddata + + def filtered_metrics( + self, + group_name="firefox", + transformer=None, + metrics=None, + exclude=None, + split_by=None, + simplify_names=False, + simplify_exclude=["statistics"], + ): + """Filters the metrics to only those that were requested by `metrics`. + + If metrics is Falsey (None, empty list, etc.) then no metrics + will be filtered. The entries in metrics are pattern matched with + the subtests in the standardized data (not a regular expression). + For example, if "firstPaint" is in metrics, then all subtests which + contain this string in their name will be kept. + + :param metrics list: List of metrics to keep. + :param exclude list: List of string matchers to exclude from the metrics + gathered/reported. + :param split_by str: The name of a metric to use to split up data by. + :param simplify_exclude list: List of string matchers to exclude + from the naming simplification process. + :return dict: Standardized notebook data containing the + requested metrics. + """ + results = self.get_standardized_data( + group_name=group_name, transformer=transformer + ) + if not metrics: + return results + if not exclude: + exclude = [] + if not simplify_exclude: + simplify_exclude = [] + + # Get the field to split the results by (if any) + if split_by is not None: + splitting_entry = None + for data_type, data_info in results.items(): + for res in data_info: + if split_by in res["subtest"]: + splitting_entry = res + break + if splitting_entry is not None: + split_by = defaultdict(list) + for c, entry in enumerate(splitting_entry["data"]): + split_by[entry["value"]].append(c) + + # Filter metrics + filtered = {} + for data_type, data_info in results.items(): + newresults = [] + for res in data_info: + if any([met["name"] in res["subtest"] for met in metrics]) and not any( + [met in res["subtest"] for met in exclude] + ): + res["transformer"] = self.ptnb_config[data_type][ + "custom_transformer" + ] + newresults.append(res) + filtered[data_type] = newresults + + # Simplify the filtered metric names + if simplify_names: + + def _simplify(name): + if any([met in name for met in simplify_exclude]): + return None + return name.split(".")[-1] + + self._alter_name(filtered, res, filter=_simplify) + + # Split the filtered results + if split_by is not None: + newfilt = {} + total_iterations = sum([len(inds) for _, inds in split_by.items()]) + for data_type in filtered: + if not filtered[data_type]: + # Ignore empty data types + continue + + newresults = [] + newfilt[data_type] = newresults + for split, indices in split_by.items(): + for res in filtered[data_type]: + if len(res["data"]) != total_iterations: + # Skip data that cannot be split + continue + splitres = {key: val for key, val in res.items()} + splitres["subtest"] += " " + split + splitres["data"] = [res["data"][i] for i in indices] + splitres["transformer"] = self.ptnb_config[data_type][ + "custom_transformer" + ] + + newresults.append(splitres) + + filtered = newfilt + + return filtered + + def _alter_name(self, filtered, res, filter): + previous = [] + for data_type, data_info in filtered.items(): + for res in data_info: + new = filter(res["subtest"]) + if new is None: + continue + if new in previous: + self.logger.warning( + f"Another metric which ends with `{new}` was already found. " + f"{res['subtest']} will not be simplified." + ) + continue + res["subtest"] = new + previous.append(new) + + +_metrics = {} + + +def filtered_metrics( + metadata, + path, + prefix, + group_name="firefox", + transformer=None, + metrics=None, + settings=False, + exclude=None, + split_by=None, + simplify_names=False, + simplify_exclude=["statistics"], +): + """Returns standardized data extracted from the metadata instance. + + We're caching an instance of MetricsStorage per metrics/storage + combination and compute the data only once when this function is called. + """ + key = path, prefix + if key not in _metrics: + storage = _metrics[key] = MetricsStorage(path, prefix, metadata) + storage.set_results(metadata.get_results()) + else: + storage = _metrics[key] + + results = storage.filtered_metrics( + group_name=group_name, + transformer=transformer, + metrics=metrics, + exclude=exclude, + split_by=split_by, + simplify_names=simplify_names, + simplify_exclude=simplify_exclude, + ) + + # XXX returning two different types is a problem + # in case settings is false, we should return None for it + # and always return a 2-tuple + if settings: + return results, storage.settings + return results |