diff options
Diffstat (limited to 'tools/lint/perfdocs')
-rw-r--r-- | tools/lint/perfdocs/__init__.py | 13 | ||||
-rw-r--r-- | tools/lint/perfdocs/doc_helpers.py | 85 | ||||
-rw-r--r-- | tools/lint/perfdocs/framework_gatherers.py | 571 | ||||
-rw-r--r-- | tools/lint/perfdocs/gatherer.py | 156 | ||||
-rw-r--r-- | tools/lint/perfdocs/generator.py | 281 | ||||
-rw-r--r-- | tools/lint/perfdocs/logger.py | 95 | ||||
-rw-r--r-- | tools/lint/perfdocs/perfdocs.py | 95 | ||||
-rw-r--r-- | tools/lint/perfdocs/templates/index.rst | 86 | ||||
-rw-r--r-- | tools/lint/perfdocs/utils.py | 157 | ||||
-rw-r--r-- | tools/lint/perfdocs/verifier.py | 601 |
10 files changed, 2140 insertions, 0 deletions
diff --git a/tools/lint/perfdocs/__init__.py b/tools/lint/perfdocs/__init__.py new file mode 100644 index 0000000000..1194d38624 --- /dev/null +++ b/tools/lint/perfdocs/__init__.py @@ -0,0 +1,13 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import os + +from perfdocs import perfdocs + +here = os.path.abspath(os.path.dirname(__file__)) +PERFDOCS_REQUIREMENTS_PATH = os.path.join(here, "requirements.txt") + + +def lint(paths, config, logger, fix=False, **lintargs): + return perfdocs.run_perfdocs(config, logger=logger, paths=paths, generate=fix) diff --git a/tools/lint/perfdocs/doc_helpers.py b/tools/lint/perfdocs/doc_helpers.py new file mode 100644 index 0000000000..709f190204 --- /dev/null +++ b/tools/lint/perfdocs/doc_helpers.py @@ -0,0 +1,85 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +class MismatchedRowLengthsException(Exception): + """ + This exception is thrown when there is a mismatch between the number of items in a row, + and the number of headers defined. + """ + + pass + + +class TableBuilder(object): + """ + Helper class for building tables. + """ + + def __init__(self, title, widths, header_rows, headers, indent=0): + """ + :param title: str - Title of the table + :param widths: list of str - Widths of each column of the table + :param header_rows: int - Number of header rows + :param headers: 2D list of str - Headers + :param indent: int - Number of spaces to indent table + """ + if not isinstance(title, str): + raise TypeError("TableBuilder attribute title must be a string.") + if not isinstance(widths, list) or not isinstance(widths[0], int): + raise TypeError("TableBuilder attribute widths must be a list of integers.") + if not isinstance(header_rows, int): + raise TypeError("TableBuilder attribute header_rows must be an integer.") + if ( + not isinstance(headers, list) + or not isinstance(headers[0], list) + or not isinstance(headers[0][0], str) + ): + raise TypeError( + "TableBuilder attribute headers must be a two-dimensional list of strings." + ) + if not isinstance(indent, int): + raise TypeError("TableBuilder attribute indent must be an integer.") + + self.title = title + self.widths = widths + self.header_rows = header_rows + self.headers = headers + self.indent = " " * indent + self.table = "" + self._build_table() + + def _build_table(self): + if len(self.widths) != len(self.headers[0]): + raise MismatchedRowLengthsException( + "Number of table headers must match number of column widths." + ) + widths = " ".join(map(str, self.widths)) + self.table += ( + f"{self.indent}.. list-table:: **{self.title}**\n" + f"{self.indent} :widths: {widths}\n" + f"{self.indent} :header-rows: {self.header_rows}\n\n" + ) + self.add_rows(self.headers) + + def add_rows(self, rows): + if type(rows) != list or type(rows[0]) != list or type(rows[0][0]) != str: + raise TypeError("add_rows() requires a two-dimensional list of strings.") + for row in rows: + self.add_row(row) + + def add_row(self, values): + if len(values) != len(self.widths): + raise MismatchedRowLengthsException( + "Number of items in a row must must number of columns defined." + ) + for i, val in enumerate(values): + if i == 0: + self.table += f"{self.indent} * - **{val}**\n" + else: + self.table += f"{self.indent} - {val}\n" + + def finish_table(self): + self.table += "\n" + return self.table diff --git a/tools/lint/perfdocs/framework_gatherers.py b/tools/lint/perfdocs/framework_gatherers.py new file mode 100644 index 0000000000..4c2ca585ad --- /dev/null +++ b/tools/lint/perfdocs/framework_gatherers.py @@ -0,0 +1,571 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import json +import os +import pathlib +import re + +from gecko_taskgraph.util.attributes import match_run_on_projects +from manifestparser import TestManifest +from mozperftest.script import ScriptInfo + +from perfdocs.doc_helpers import TableBuilder +from perfdocs.logger import PerfDocLogger +from perfdocs.utils import read_yaml + +logger = PerfDocLogger() + +BRANCHES = [ + "mozilla-central", + "autoland", + "mozilla-release", + "mozilla-beta", +] + +""" +This file is for framework specific gatherers since manifests +might be parsed differently in each of them. The gatherers +must implement the FrameworkGatherer class. +""" + + +class FrameworkGatherer(object): + """ + Abstract class for framework gatherers. + """ + + def __init__(self, yaml_path, workspace_dir, taskgraph={}): + """ + Generic initialization for a framework gatherer. + """ + self.workspace_dir = workspace_dir + self._yaml_path = yaml_path + self._taskgraph = taskgraph + self._suite_list = {} + self._test_list = {} + self._descriptions = {} + self._manifest_path = "" + self._manifest = None + self.script_infos = {} + self._task_list = {} + self._task_match_pattern = re.compile(r"([\w\W]*/[pgo|opt]*)-([\w\W]*)") + + def get_task_match(self, task_name): + return re.search(self._task_match_pattern, task_name) + + def get_manifest_path(self): + """ + Returns the path to the manifest based on the + manifest entry in the frameworks YAML configuration + file. + + :return str: Path to the manifest. + """ + if self._manifest_path: + return self._manifest_path + + yaml_content = read_yaml(self._yaml_path) + self._manifest_path = pathlib.Path(self.workspace_dir, yaml_content["manifest"]) + return self._manifest_path + + def get_suite_list(self): + """ + Each framework gatherer must return a dictionary with + the following structure. Note that the test names must + be relative paths so that issues can be correctly issued + by the reviewbot. + + :return dict: A dictionary with the following structure: { + "suite_name": [ + 'testing/raptor/test1', + 'testing/raptor/test2' + ] + } + """ + raise NotImplementedError + + def _build_section_with_header(self, title, content, header_type=None): + """ + Adds a section to the documentation with the title as the type mentioned + and paragraph as content mentioned. + :param title: title of the section + :param content: content of section paragraph + :param header_type: type of the title heading + """ + heading_map = {"H2": "*", "H3": "=", "H4": "-", "H5": "^"} + return [title, heading_map.get(header_type, "^") * len(title), content, ""] + + +class RaptorGatherer(FrameworkGatherer): + """ + Gatherer for the Raptor framework. + """ + + def get_suite_list(self): + """ + Returns a dictionary containing a mapping from suites + to the tests they contain. + + :return dict: A dictionary with the following structure: { + "suite_name": [ + 'testing/raptor/test1', + 'testing/raptor/test2' + ] + } + """ + if self._suite_list: + return self._suite_list + + manifest_path = self.get_manifest_path() + + # Get the tests from the manifest + test_manifest = TestManifest([str(manifest_path)], strict=False) + test_list = test_manifest.active_tests(exists=False, disabled=False) + + # Parse the tests into the expected dictionary + for test in test_list: + # Get the top-level suite + s = os.path.basename(test["here"]) + if s not in self._suite_list: + self._suite_list[s] = [] + + # Get the individual test + fpath = re.sub(".*testing", "testing", test["manifest"]) + + if fpath not in self._suite_list[s]: + self._suite_list[s].append(fpath) + + return self._suite_list + + def _get_ci_tasks(self): + for task in self._taskgraph.keys(): + if type(self._taskgraph[task]) == dict: + command = self._taskgraph[task]["task"]["payload"].get("command", []) + run_on_projects = self._taskgraph[task]["attributes"]["run_on_projects"] + else: + command = self._taskgraph[task].task["payload"].get("command", []) + run_on_projects = self._taskgraph[task].attributes["run_on_projects"] + + test_match = re.search(r"[\s']--test[\s=](.+?)[\s']", str(command)) + task_match = self.get_task_match(task) + if test_match and task_match: + test = test_match.group(1) + platform = task_match.group(1) + test_name = task_match.group(2) + + item = {"test_name": test_name, "run_on_projects": run_on_projects} + self._task_list.setdefault(test, {}).setdefault(platform, []).append( + item + ) + + def _get_subtests_from_ini(self, manifest_path, suite_name): + """ + Returns a list of (sub)tests from an ini file containing the test definitions. + + :param str manifest_path: path to the ini file + :return list: the list of the tests + """ + desc_exclusion = ["here", "manifest_relpath", "path", "relpath"] + test_manifest = TestManifest([str(manifest_path)], strict=False) + test_list = test_manifest.active_tests(exists=False, disabled=False) + subtests = {} + for subtest in test_list: + subtests[subtest["name"]] = subtest["manifest"] + + description = {} + for key, value in subtest.items(): + if key not in desc_exclusion: + description[key] = value + + # Prepare alerting metrics for verification + description["metrics"] = [ + metric.strip() + for metric in description.get("alert_on", "").split(",") + if metric.strip() != "" + ] + + subtests[subtest["name"]] = description + self._descriptions.setdefault(suite_name, []).append(description) + + self._descriptions[suite_name].sort(key=lambda item: item["name"]) + + return subtests + + def get_test_list(self): + """ + Returns a dictionary containing the tests in every suite ini file. + + :return dict: A dictionary with the following structure: { + "suite_name": { + 'raptor_test1', + 'raptor_test2' + }, + } + """ + if self._test_list: + return self._test_list + + suite_list = self.get_suite_list() + + # Iterate over each manifest path from suite_list[suite_name] + # and place the subtests into self._test_list under the same key + for suite_name, manifest_paths in suite_list.items(): + if not self._test_list.get(suite_name): + self._test_list[suite_name] = {} + for manifest_path in manifest_paths: + subtest_list = self._get_subtests_from_ini(manifest_path, suite_name) + self._test_list[suite_name].update(subtest_list) + + self._get_ci_tasks() + + return self._test_list + + def build_test_description(self, title, test_description="", suite_name=""): + matcher = [] + browsers = [ + "firefox", + "chrome", + "chromium", + "refbrow", + "fennec68", + "geckoview", + "fenix", + ] + test_name = [f"{title}-{browser}" for browser in browsers] + test_name.append(title) + + for suite, val in self._descriptions.items(): + for test in val: + if test["name"] in test_name and suite_name == suite: + matcher.append(test) + + if len(matcher) == 0: + logger.critical( + "No tests exist for the following name " + "(obtained from config.yml): {}".format(title) + ) + raise Exception( + "No tests exist for the following name " + "(obtained from config.yml): {}".format(title) + ) + + result = f".. dropdown:: {title}\n" + result += f" :class-container: anchor-id-{title}-{suite_name[0]}\n\n" + + for idx, description in enumerate(matcher): + if description["name"] != title: + result += f" {idx+1}. **{description['name']}**\n\n" + if "owner" in description.keys(): + result += f" **Owner**: {description['owner']}\n\n" + + for key in sorted(description.keys()): + if key in ["owner", "name", "manifest", "metrics"]: + continue + sub_title = key.replace("_", " ") + if key == "test_url": + if "<" in description[key] or ">" in description[key]: + description[key] = description[key].replace("<", "\<") + description[key] = description[key].replace(">", "\>") + result += f" * **{sub_title}**: `<{description[key]}>`__\n" + elif key == "secondary_url": + result += f" * **{sub_title}**: `<{description[key]}>`__\n" + elif key in ["playback_pageset_manifest"]: + result += ( + f" * **{sub_title}**: " + f"{description[key].replace('{subtest}', description['name'])}\n" + ) + else: + if "\n" in description[key]: + description[key] = description[key].replace("\n", " ") + result += f" * **{sub_title}**: {description[key]}\n" + + if self._task_list.get(title, []): + result += " * **Test Task**:\n\n" + for platform in sorted(self._task_list[title]): + self._task_list[title][platform].sort(key=lambda x: x["test_name"]) + + table = TableBuilder( + title=platform, + widths=[30] + [15 for x in BRANCHES], + header_rows=1, + headers=[["Test Name"] + BRANCHES], + indent=3, + ) + + for task in self._task_list[title][platform]: + values = [task["test_name"]] + values += [ + "\u2705" + if match_run_on_projects(x, task["run_on_projects"]) + else "\u274C" + for x in BRANCHES + ] + table.add_row(values) + result += f"{table.finish_table()}\n" + + return [result] + + def build_suite_section(self, title, content): + return self._build_section_with_header( + title.capitalize(), content, header_type="H4" + ) + + +class MozperftestGatherer(FrameworkGatherer): + """ + Gatherer for the Mozperftest framework. + """ + + def get_test_list(self): + """ + Returns a dictionary containing the tests that are in perftest.toml manifest. + + :return dict: A dictionary with the following structure: { + "suite_name": { + 'perftest_test1', + 'perftest_test2', + }, + } + """ + for path in list(pathlib.Path(self.workspace_dir).rglob("perftest.toml")): + if "obj-" in str(path): + continue + suite_name = str(path.parent).replace(str(self.workspace_dir), "") + + # If the workspace dir doesn't end with a forward-slash, + # the substitution above won't work completely + if suite_name.startswith("/") or suite_name.startswith("\\"): + suite_name = suite_name[1:] + + # We have to add new paths to the logger as we search + # because mozperftest tests exist in multiple places in-tree + PerfDocLogger.PATHS.append(suite_name) + + # Get the tests from perftest.toml + test_manifest = TestManifest([str(path)], strict=False) + test_list = test_manifest.active_tests(exists=False, disabled=False) + for test in test_list: + si = ScriptInfo(test["path"]) + self.script_infos[si["name"]] = si + self._test_list.setdefault(suite_name.replace("\\", "/"), {}).update( + {si["name"]: {"path": str(path)}} + ) + + return self._test_list + + def build_test_description(self, title, test_description="", suite_name=""): + return [str(self.script_infos[title])] + + def build_suite_section(self, title, content): + return self._build_section_with_header(title, content, header_type="H4") + + +class TalosGatherer(FrameworkGatherer): + def _get_ci_tasks(self): + with open( + pathlib.Path(self.workspace_dir, "testing", "talos", "talos.json") + ) as f: + config_suites = json.load(f)["suites"] + + for task_name in self._taskgraph.keys(): + task = self._taskgraph[task_name] + + if type(task) == dict: + is_talos = task["task"]["extra"].get("suite", []) + command = task["task"]["payload"].get("command", []) + run_on_projects = task["attributes"]["run_on_projects"] + else: + is_talos = task.task["extra"].get("suite", []) + command = task.task["payload"].get("command", []) + run_on_projects = task.attributes["run_on_projects"] + + suite_match = re.search(r"[\s']--suite[\s=](.+?)[\s']", str(command)) + task_match = self.get_task_match(task_name) + if "talos" == is_talos and task_match: + suite = suite_match.group(1) + platform = task_match.group(1) + test_name = task_match.group(2) + item = {"test_name": test_name, "run_on_projects": run_on_projects} + + for test in config_suites[suite]["tests"]: + self._task_list.setdefault(test, {}).setdefault( + platform, [] + ).append(item) + + def get_test_list(self): + from talos import test as talos_test + + test_lists = talos_test.test_dict() + mod = __import__("talos.test", fromlist=test_lists) + + suite_name = "Talos Tests" + + for test in test_lists: + self._test_list.setdefault(suite_name, {}).update({test: {}}) + + klass = getattr(mod, test) + self._descriptions.setdefault(test, klass.__dict__) + + self._get_ci_tasks() + + return self._test_list + + def build_test_description(self, title, test_description="", suite_name=""): + result = f".. dropdown:: {title}\n" + result += f" :class-container: anchor-id-{title}\n\n" + + yml_descriptions = [s.strip() for s in test_description.split("- ") if s] + for description in yml_descriptions: + if "Example Data" in description: + # Example Data for using code block + example_list = [s.strip() for s in description.split("* ")] + result += f" * {example_list[0]}\n" + result += "\n .. code-block::\n\n" + for example in example_list[1:]: + result += f" {example}\n" + result += "\n" + + elif " * " in description: + # Sub List + sub_list = [s.strip() for s in description.split(" * ")] + result += f" * {sub_list[0]}\n" + for sub in sub_list[1:]: + result += f" * {sub}\n" + + else: + # General List + result += f" * {description}\n" + + if title in self._descriptions: + for key in sorted(self._descriptions[title]): + if key.startswith("__") and key.endswith("__"): + continue + elif key == "filters": + continue + + # On windows, we get the paths in the wrong style + value = self._descriptions[title][key] + if isinstance(value, dict): + for k, v in value.items(): + if isinstance(v, str) and "\\" in v: + value[k] = str(v).replace("\\", r"/") + result += r" * " + key + r": " + str(value) + r"\n" + + # Command + result += " * Command\n\n" + result += " .. code-block::\n\n" + result += f" ./mach talos-test -a {title}\n\n" + + if self._task_list.get(title, []): + result += " * **Test Task**:\n\n" + for platform in sorted(self._task_list[title]): + self._task_list[title][platform].sort(key=lambda x: x["test_name"]) + + table = TableBuilder( + title=platform, + widths=[30] + [15 for x in BRANCHES], + header_rows=1, + headers=[["Test Name"] + BRANCHES], + indent=3, + ) + + for task in self._task_list[title][platform]: + values = [task["test_name"]] + values += [ + "\u2705" + if match_run_on_projects(x, task["run_on_projects"]) + else "\u274C" + for x in BRANCHES + ] + table.add_row(values) + result += f"{table.finish_table()}\n" + + return [result] + + def build_suite_section(self, title, content): + return self._build_section_with_header(title, content, header_type="H2") + + +class AwsyGatherer(FrameworkGatherer): + """ + Gatherer for the Awsy framework. + """ + + def _generate_ci_tasks(self): + for task_name in self._taskgraph.keys(): + task = self._taskgraph[task_name] + + if type(task) == dict: + awsy_test = task["task"]["extra"].get("suite", []) + run_on_projects = task["attributes"]["run_on_projects"] + else: + awsy_test = task.task["extra"].get("suite", []) + run_on_projects = task.attributes["run_on_projects"] + + task_match = self.get_task_match(task_name) + + if "awsy" in awsy_test and task_match: + platform = task_match.group(1) + test_name = task_match.group(2) + item = {"test_name": test_name, "run_on_projects": run_on_projects} + self._task_list.setdefault(platform, []).append(item) + + def get_suite_list(self): + self._suite_list = {"Awsy tests": ["tp6", "base", "dmd", "tp5"]} + return self._suite_list + + def get_test_list(self): + self._generate_ci_tasks() + return { + "Awsy tests": { + "tp6": {}, + "base": {}, + "dmd": {}, + "tp5": {}, + } + } + + def build_suite_section(self, title, content): + return self._build_section_with_header( + title.capitalize(), content, header_type="H4" + ) + + def build_test_description(self, title, test_description="", suite_name=""): + dropdown_suite_name = suite_name.replace(" ", "-") + result = f".. dropdown:: {title} ({test_description})\n" + result += f" :class-container: anchor-id-{title}-{dropdown_suite_name}\n\n" + + awsy_data = read_yaml(self._yaml_path)["suites"]["Awsy tests"] + if "owner" in awsy_data.keys(): + result += f" **Owner**: {awsy_data['owner']}\n\n" + result += " * **Test Task**:\n" + + # tp5 tests are represented by awsy-e10s test names + # while the others have their title in test names + search_tag = "awsy-e10s" if title == "tp5" else title + for platform in sorted(self._task_list.keys()): + result += f" * {platform}\n" + for test_dict in sorted( + self._task_list[platform], key=lambda d: d["test_name"] + ): + if search_tag in test_dict["test_name"]: + run_on_project = ": " + ( + ", ".join(test_dict["run_on_projects"]) + if test_dict["run_on_projects"] + else "None" + ) + result += ( + f" * {test_dict['test_name']}{run_on_project}\n" + ) + result += "\n" + + return [result] + + +class StaticGatherer(FrameworkGatherer): + """ + A noop gatherer for frameworks with static-only documentation. + """ + + pass diff --git a/tools/lint/perfdocs/gatherer.py b/tools/lint/perfdocs/gatherer.py new file mode 100644 index 0000000000..828c2f7f2b --- /dev/null +++ b/tools/lint/perfdocs/gatherer.py @@ -0,0 +1,156 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import os +import pathlib + +from perfdocs.framework_gatherers import ( + AwsyGatherer, + MozperftestGatherer, + RaptorGatherer, + StaticGatherer, + TalosGatherer, +) +from perfdocs.logger import PerfDocLogger +from perfdocs.utils import read_yaml + +logger = PerfDocLogger() + +# TODO: Implement decorator/searcher to find the classes. +frameworks = { + "raptor": RaptorGatherer, + "mozperftest": MozperftestGatherer, + "talos": TalosGatherer, + "awsy": AwsyGatherer, +} + +# List of file types allowed to be used as static files +ALLOWED_STATIC_FILETYPES = ("rst", "png") + + +class Gatherer(object): + """ + Gatherer produces the tree of the perfdoc's entries found + and can obtain manifest-based test lists. Used by the Verifier. + """ + + def __init__(self, workspace_dir, taskgraph=None): + """ + Initialzie the Gatherer. + + :param str workspace_dir: Path to the gecko checkout. + """ + self.workspace_dir = workspace_dir + self.taskgraph = taskgraph + self._perfdocs_tree = [] + self._test_list = [] + self.framework_gatherers = {} + + @property + def perfdocs_tree(self): + """ + Returns the perfdocs_tree, and computes it + if it doesn't exist. + + :return dict: The perfdocs tree containing all + framework perfdoc entries. See `fetch_perfdocs_tree` + for information on the data structure. + """ + if self._perfdocs_tree: + return self._perfdocs_tree + else: + self.fetch_perfdocs_tree() + return self._perfdocs_tree + + def fetch_perfdocs_tree(self): + """ + Creates the perfdocs tree with the following structure: + [ + { + "path": Path to the perfdocs directory. + "yml": Name of the configuration YAML file. + "rst": Name of the RST file. + "static": Name of the static file. + }, ... + ] + + This method doesn't return anything. The result can be found in + the perfdocs_tree attribute. + """ + exclude_dir = [ + str(pathlib.Path(self.workspace_dir, ".hg")), + str(pathlib.Path("tools", "lint")), + str(pathlib.Path("testing", "perfdocs")), + ] + + for path in pathlib.Path(self.workspace_dir).rglob("perfdocs"): + if any(d in str(path.resolve()) for d in exclude_dir): + continue + files = [f for f in os.listdir(path)] + matched = {"path": str(path), "yml": "", "rst": "", "static": []} + + for file in files: + # Add the yml/rst/static file to its key if re finds the searched file + if file == "config.yml" or file == "config.yaml": + matched["yml"] = file + elif file == "index.rst": + matched["rst"] = file + elif file.split(".")[-1] in ALLOWED_STATIC_FILETYPES: + matched["static"].append(file) + + # Append to structdocs if all the searched files were found + if all(val for val in matched.values() if not type(val) == list): + self._perfdocs_tree.append(matched) + + logger.log( + "Found {} perfdocs directories in {}".format( + len(self._perfdocs_tree), + [d["path"] for d in self._perfdocs_tree], + ) + ) + + def get_test_list(self, sdt_entry): + """ + Use a perfdocs_tree entry to find the test list for + the framework that was found. + + :return: A framework info dictionary with fields: { + 'yml_path': Path to YAML, + 'yml_content': Content of YAML, + 'name': Name of framework, + 'test_list': Test list found for the framework + } + """ + + # If it was computed before, return it + yaml_path = pathlib.Path(sdt_entry["path"], sdt_entry["yml"]) + for entry in self._test_list: + if entry["yml_path"] == yaml_path: + return entry + + # Set up framework entry with meta data + yaml_content = read_yaml(yaml_path) + framework = { + "yml_content": yaml_content, + "yml_path": yaml_path, + "name": yaml_content["name"], + "test_list": {}, + } + + if yaml_content["static-only"]: + framework_gatherer_cls = StaticGatherer + else: + framework_gatherer_cls = frameworks[framework["name"]] + + # Get and then store the frameworks tests + framework_gatherer = self.framework_gatherers[ + framework["name"] + ] = framework_gatherer_cls( + framework["yml_path"], self.workspace_dir, self.taskgraph + ) + + if not yaml_content["static-only"]: + framework["test_list"] = framework_gatherer.get_test_list() + + self._test_list.append(framework) + return framework diff --git a/tools/lint/perfdocs/generator.py b/tools/lint/perfdocs/generator.py new file mode 100644 index 0000000000..3f3a0acefa --- /dev/null +++ b/tools/lint/perfdocs/generator.py @@ -0,0 +1,281 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import pathlib +import re +import shutil +import tempfile + +from perfdocs.logger import PerfDocLogger +from perfdocs.utils import ( + ON_TRY, + are_dirs_equal, + get_changed_files, + read_file, + read_yaml, + save_file, +) + +logger = PerfDocLogger() + + +class Generator(object): + """ + After each perfdocs directory was validated, the generator uses the templates + for each framework, fills them with the test descriptions in config and saves + the perfdocs in the form index.rst as index file and suite_name.rst for + each suite of tests in the framework. + """ + + def __init__(self, verifier, workspace, generate=False): + """ + Initialize the Generator. + + :param verifier: Verifier object. It should not be a fresh Verifier object, + but an initialized one with validate_tree() method already called + :param workspace: Path to the top-level checkout directory. + :param generate: Flag for generating the documentation + """ + self._workspace = workspace + if not self._workspace: + raise Exception("PerfDocs Generator requires a workspace directory.") + # Template documents without added information reside here + self.templates_path = pathlib.Path( + self._workspace, "tools", "lint", "perfdocs", "templates" + ) + self.perfdocs_path = pathlib.Path( + self._workspace, "testing", "perfdocs", "generated" + ) + + self._generate = generate + self._verifier = verifier + self._perfdocs_tree = self._verifier._gatherer.perfdocs_tree + + def build_perfdocs_from_tree(self): + """ + Builds up a document for each framework that was found. + + :return dict: A dictionary containing a mapping from each framework + to the document that was built for it, i.e: + { + framework_name: framework_document, + ... + } + """ + + # Using the verified `perfdocs_tree`, build up the documentation. + frameworks_info = {} + for framework in self._perfdocs_tree: + yaml_content = read_yaml(pathlib.Path(framework["path"], framework["yml"])) + rst_content = read_file( + pathlib.Path(framework["path"], framework["rst"]), stringify=True + ) + + # Gather all tests and descriptions and format them into + # documentation content + documentation = [] + suites = yaml_content["suites"] + for suite_name in sorted(suites.keys()): + suite_info = suites[suite_name] + + # Add the suite section + documentation.extend( + self._verifier._gatherer.framework_gatherers[ + yaml_content["name"] + ].build_suite_section(suite_name, suite_info["description"]) + ) + + tests = suite_info.get("tests", {}) + for test_name in sorted(tests.keys()): + gatherer = self._verifier._gatherer.framework_gatherers[ + yaml_content["name"] + ] + test_description = gatherer.build_test_description( + test_name, tests[test_name], suite_name + ) + documentation.extend(test_description) + documentation.append("") + + # Insert documentation into `.rst` file + framework_rst = re.sub( + r"{documentation}", "\n".join(documentation), rst_content + ) + frameworks_info[yaml_content["name"]] = { + "dynamic": framework_rst, + "static": [], + } + + # For static `.rst` file + for static_file in framework["static"]: + if static_file.endswith("rst"): + frameworks_info[yaml_content["name"]]["static"].append( + { + "file": static_file, + "content": read_file( + pathlib.Path(framework["path"], static_file), + stringify=True, + ), + } + ) + else: + frameworks_info[yaml_content["name"]]["static"].append( + { + "file": static_file, + "content": pathlib.Path(framework["path"], static_file), + } + ) + + return frameworks_info + + def _create_temp_dir(self): + """ + Create a temp directory as preparation of saving the documentation tree. + :return: str the location of perfdocs_tmpdir + """ + # Build the directory that will contain the final result (a tmp dir + # that will be moved to the final location afterwards) + try: + tmpdir = pathlib.Path(tempfile.mkdtemp()) + perfdocs_tmpdir = pathlib.Path(tmpdir, "generated") + perfdocs_tmpdir.mkdir(parents=True, exist_ok=True) + perfdocs_tmpdir.chmod(0o766) + except OSError as e: + logger.critical("Error creating temp file: {}".format(e)) + + if perfdocs_tmpdir.is_dir(): + return perfdocs_tmpdir + return False + + def _create_perfdocs(self): + """ + Creates the perfdocs documentation. + :return: str path of the temp dir it is saved in + """ + # All directories that are kept in the perfdocs tree are valid, + # so use it to build up the documentation. + framework_docs = self.build_perfdocs_from_tree() + perfdocs_tmpdir = self._create_temp_dir() + + # Save the documentation files + frameworks = [] + for framework_name in sorted(framework_docs.keys()): + frameworks.append(framework_name) + save_file( + framework_docs[framework_name]["dynamic"], + pathlib.Path(perfdocs_tmpdir, framework_name), + ) + + for static_name in framework_docs[framework_name]["static"]: + if static_name["file"].endswith(".rst"): + # XXX Replace this with a shutil.copy call (like below) + save_file( + static_name["content"], + pathlib.Path( + perfdocs_tmpdir, static_name["file"].split(".")[0] + ), + ) + else: + shutil.copy( + static_name["content"], + pathlib.Path(perfdocs_tmpdir, static_name["file"]), + ) + + # Get the main page and add the framework links to it + mainpage = read_file( + pathlib.Path(self.templates_path, "index.rst"), stringify=True + ) + + fmt_frameworks = "\n".join([" * :doc:`%s`" % name for name in frameworks]) + fmt_toctree = "\n".join([" %s" % name for name in frameworks]) + + fmt_mainpage = re.sub(r"{toctree_documentation}", fmt_toctree, mainpage) + fmt_mainpage = re.sub(r"{test_documentation}", fmt_frameworks, fmt_mainpage) + + save_file(fmt_mainpage, pathlib.Path(perfdocs_tmpdir, "index")) + + return perfdocs_tmpdir + + def _save_perfdocs(self, perfdocs_tmpdir): + """ + Copies the perfdocs tree after it was saved into the perfdocs_tmpdir + :param perfdocs_tmpdir: str location of the temp dir where the + perfdocs was saved + """ + # Remove the old docs and copy the new version there without + # checking if they need to be regenerated. + logger.log("Regenerating perfdocs...") + + if self.perfdocs_path.exists(): + shutil.rmtree(str(self.perfdocs_path)) + + try: + saved = shutil.copytree(str(perfdocs_tmpdir), str(self.perfdocs_path)) + if saved: + logger.log( + "Documentation saved to {}/".format( + re.sub(".*testing", "testing", str(self.perfdocs_path)) + ) + ) + except Exception as e: + logger.critical( + "There was an error while saving the documentation: {}".format(e) + ) + + def generate_perfdocs(self): + """ + Generate the performance documentation. + + If `self._generate` is True, then the documentation will be regenerated + without any checks. Otherwise, if it is False, the new documentation will be + prepare and compare with the existing documentation to determine if + it should be regenerated. + + :return bool: True/False - For True, if `self._generate` is True, then the + docs were regenerated. If `self._generate` is False, then True will mean + that the docs should be regenerated, and False means that they do not + need to be regenerated. + """ + + def get_possibly_changed_files(): + """ + Returns files that might have been modified + (used to output a linter warning for regeneration) + :return: list - files that might have been modified + """ + # Returns files that might have been modified + # (used to output a linter warning for regeneration) + files = [] + for entry in self._perfdocs_tree: + files.extend( + [ + pathlib.Path(entry["path"], entry["yml"]), + pathlib.Path(entry["path"], entry["rst"]), + ] + ) + return files + + # Throw a warning if there's no need for generating + if not self.perfdocs_path.exists() and not self._generate: + # If they don't exist and we are not generating, then throw + # a linting error and exit. + logger.warning( + "PerfDocs need to be regenerated.", files=get_possibly_changed_files() + ) + return True + + perfdocs_tmpdir = self._create_perfdocs() + if self._generate: + self._save_perfdocs(perfdocs_tmpdir) + else: + # If we are not generating, then at least check if they + # should be regenerated by comparing the directories. + if not are_dirs_equal(perfdocs_tmpdir, self.perfdocs_path): + logger.warning( + "PerfDocs are outdated, run ./mach lint -l perfdocs --fix .` " + + "to update them. You can also apply the " + + f"{'perfdocs.diff' if ON_TRY else 'diff.txt'} patch file " + + f"{'produced from this reviewbot test ' if ON_TRY else ''}" + + "to fix the issue.", + files=get_changed_files(self._workspace), + restricted=False, + ) diff --git a/tools/lint/perfdocs/logger.py b/tools/lint/perfdocs/logger.py new file mode 100644 index 0000000000..ba02532c32 --- /dev/null +++ b/tools/lint/perfdocs/logger.py @@ -0,0 +1,95 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import pathlib + + +class PerfDocLogger(object): + """ + Logger for the PerfDoc tooling. Handles the warnings by outputting + them into through the StructuredLogger provided by lint. + """ + + TOP_DIR = "" + PATHS = [] + LOGGER = None + FAILED = False + + def __init__(self): + """Initializes the PerfDocLogger.""" + + # Set up class attributes for all logger instances + if not PerfDocLogger.LOGGER: + raise Exception( + "Missing linting LOGGER instance for PerfDocLogger initialization" + ) + if not PerfDocLogger.PATHS: + raise Exception("Missing PATHS for PerfDocLogger initialization") + self.logger = PerfDocLogger.LOGGER + + def log(self, msg): + """ + Log an info message. + + :param str msg: Message to log. + """ + self.logger.info(msg) + + def warning(self, msg, files, restricted=True): + """ + Logs a validation warning message. The warning message is + used as the error message that is output in the reviewbot. + + :param str msg: Message to log, it's also used as the error message + for the issue that is output by the reviewbot. + :param list/str files: The file(s) that this warning is about. + :param boolean restricted: If the param is False, the lint error can be used anywhere. + """ + if type(files) != list: + files = [files] + + if len(files) == 0: + self.logger.info("No file was provided for the warning") + self.logger.lint_error( + message=msg, + lineno=0, + column=None, + path=None, + linter="perfdocs", + rule="Flawless performance docs (unknown file)", + ) + + PerfDocLogger.FAILED = True + return + + # Add a reviewbot error for each file that is given + for file in files: + # Get a relative path (reviewbot can't handle absolute paths) + fpath = str(file).replace(str(PerfDocLogger.TOP_DIR), "") + + # Filter out any issues that do not relate to the paths + # that are being linted + for path in PerfDocLogger.PATHS: + if restricted and str(path) not in str(file): + continue + + # Output error entry + self.logger.lint_error( + message=msg, + lineno=0, + column=None, + path=str(pathlib.PurePosixPath(fpath)), + linter="perfdocs", + rule="Flawless performance docs.", + ) + + PerfDocLogger.FAILED = True + break + + def critical(self, msg): + """ + Log a critical message. + + :param str msg: Message to log. + """ + self.logger.critical(msg) diff --git a/tools/lint/perfdocs/perfdocs.py b/tools/lint/perfdocs/perfdocs.py new file mode 100644 index 0000000000..b41edb1979 --- /dev/null +++ b/tools/lint/perfdocs/perfdocs.py @@ -0,0 +1,95 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import os +import pathlib + + +def run_perfdocs(config, logger=None, paths=None, generate=True): + """ + Build up performance testing documentation dynamically by combining + text data from YAML files that reside in `perfdoc` folders + across the `testing` directory. Each directory is expected to have + an `index.rst` file along with `config.yml` YAMLs defining what needs + to be added to the documentation. + + The YAML must also define the name of the "framework" that should be + used in the main index.rst for the performance testing documentation. + + The testing documentation list will be ordered alphabetically once + it's produced (to avoid unwanted shifts because of unordered dicts + and path searching). + + Note that the suite name headings will be given the H4 (---) style so it + is suggested that you use H3 (===) style as the heading for your + test section. H5 will be used be used for individual tests within each + suite. + + Usage for verification: "./mach lint -l perfdocs ." + Usage for generation: "./mach lint -l perfdocs --fix ." + + For validation, see the Verifier class for a description of how + it works. + + The run will fail if the valid result from validate_tree is not + False, implying some warning/problem was logged. + + :param dict config: The configuration given by mozlint. + :param StructuredLogger logger: The StructuredLogger instance to be used to + output the linting warnings/errors. + :param list paths: The paths that are being tested. Used to filter + out errors from files outside of these paths. + :param bool generate: If true, the docs will be (re)generated. + """ + from perfdocs.logger import PerfDocLogger + + if not os.environ.get("WORKSPACE", None): + floc = pathlib.Path(__file__).absolute() + top_dir = pathlib.Path(str(floc).split("tools")[0]).resolve() + else: + top_dir = pathlib.Path(os.environ.get("WORKSPACE")).resolve() + + PerfDocLogger.LOGGER = logger + PerfDocLogger.TOP_DIR = top_dir + + # Convert all the paths to relative ones + target_dir = [pathlib.Path(path) for path in paths] + rel_paths = [] + for path in target_dir: + try: + rel_paths.append(path.relative_to(top_dir)) + except ValueError: + rel_paths.append(path) + + PerfDocLogger.PATHS = rel_paths + + for path in target_dir: + if not path.exists(): + raise Exception("Cannot locate directory at %s" % str(path)) + + decision_task_id = os.environ.get("DECISION_TASK_ID", None) + if decision_task_id: + from taskgraph.util.taskcluster import get_artifact + + task_graph = get_artifact(decision_task_id, "public/full-task-graph.json") + else: + from tryselect.tasks import generate_tasks + + task_graph = generate_tasks( + params=None, full=True, disable_target_task_filter=True + ).tasks + + # Late import because logger isn't defined until later + from perfdocs.generator import Generator + from perfdocs.verifier import Verifier + + # Run the verifier first + verifier = Verifier(top_dir, task_graph) + verifier.validate_tree() + + if not PerfDocLogger.FAILED: + # Even if the tree is valid, we need to check if the documentation + # needs to be regenerated, and if it does, we throw a linting error. + # `generate` dictates whether or not the documentation is generated. + generator = Generator(verifier, generate=generate, workspace=top_dir) + generator.generate_perfdocs() diff --git a/tools/lint/perfdocs/templates/index.rst b/tools/lint/perfdocs/templates/index.rst new file mode 100644 index 0000000000..d2d82f6328 --- /dev/null +++ b/tools/lint/perfdocs/templates/index.rst @@ -0,0 +1,86 @@ +################### +Performance Testing +################### + +.. toctree:: + :maxdepth: 2 + :hidden: + :glob: + +{toctree_documentation} + +Performance tests are designed to catch performance regressions before they reach our +end users. At this time, there is no unified approach for these types of tests, +but `mozperftest </testing/perfdocs/mozperftest.html>`_ aims to provide this in the future. + +For more detailed information about each test suite and project, see their documentation: + +{test_documentation} + + +Here are the active PerfTest components/modules and their respective owners: + + * AWFY (Are We Fast Yet) - + - Owner: Beatrice A. + - Description: A public dashboard comparing Firefox and Chrome performance metrics + * AWSY (Are We Slim Yet) + - Owner: Alexandru I. + - Description: Project that tracks memory usage across builds + * Raptor + - Owner: Sparky + - Co-owner: Kash + - Description: Test harness that uses Browsertime (based on webdriver) as the underlying engine to run performance tests + * CondProf (Conditioned profiles) + - Owner: Sparky + - Co-owner: Jmaher + - Description: Provides tooling to build, and obtain profiles that are preconditioned in some way. + * fxrecord + - Owner: Sparky + - Co-owners: Kash, Andrej + - Description: Tool for measuring startup performance for Firefox Desktop + * Infrastructure + - Owner: Sparky + - Co-owners: Kash, Andrej + - Description: All things involving: TaskCluster, Youtube Playback, Bitbar, Mobile Configs, etc... + * Mozperftest + - Owner: Sparky + - Co-owners: Kash, Andrej + - Description: Testing framework used to run performance tests + * Mozperftest Tools + - Owner: Sparky + - Co-owner: Alexandru I. + - Description: Various tools used by performance testing team + * Mozproxy + - Owner: Sparky + - Co-owner: Kash + - Description: An http proxy used to run tests against third-party websites in a reliable and reproducible way + * PerfCompare + - Owner: Carla S. + - Co-owner: Beatrice A. + - Description: Performance comparison tool used to compare performance of different commits within a repository + * PerfDocs + - Owner: Sparky + - Co-owner: Alexandru I. + - Description: Automatically generated performance test engineering documentation + * PerfHerder + - Owner: Beatrice A + - Co-owner: Andra A. + - Description: The framework used by the performance sheriffs to find performance regressions and for storing, and visualizing our performance data. + * Performance Sheriffing + - Owner: Alexandru I. + - Co-owner: Andra A. + - Description: Performance sheriffs are responsible for finding commits that cause performance regressions and getting fixes from devs or backing out the changes + * Talos + - Owner: Sparky + - Co-owner: Andrej + - Description: Testing framework used to run Firefox-specific performance tests + * WebPageTest + - Owner: Andrej + - Co-owner: Sparky + - Description: A test running in the mozperftest framework used as a third party performance benchmark + +You can additionally reach out to our team on +the `#perftest <https://matrix.to/#/#perftest:mozilla.org>`_ channel on matrix + +For more information about the performance testing team, +`visit the wiki page <https://wiki.mozilla.org/TestEngineering/Performance>`_. diff --git a/tools/lint/perfdocs/utils.py b/tools/lint/perfdocs/utils.py new file mode 100644 index 0000000000..1ba7daeb52 --- /dev/null +++ b/tools/lint/perfdocs/utils.py @@ -0,0 +1,157 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import difflib +import filecmp +import os +import pathlib + +import yaml +from mozversioncontrol import get_repository_object + +from perfdocs.logger import PerfDocLogger + +logger = PerfDocLogger() + +ON_TRY = "MOZ_AUTOMATION" in os.environ + + +def save_file(file_content, path, extension="rst"): + """ + Saves data into a file. + + :param str path: Location and name of the file being saved + (without an extension). + :param str data: Content to write into the file. + :param str extension: Extension to save the file as. + """ + new_file = pathlib.Path("{}.{}".format(str(path), extension)) + with new_file.open("wb") as f: + f.write(file_content.encode("utf-8")) + + +def read_file(path, stringify=False): + """ + Opens a file and returns its contents. + + :param str path: Path to the file. + :return list: List containing the lines in the file. + """ + with path.open(encoding="utf-8") as f: + return f.read() if stringify else f.readlines() + + +def read_yaml(yaml_path): + """ + Opens a YAML file and returns the contents. + + :param str yaml_path: Path to the YAML to open. + :return dict: Dictionary containing the YAML content. + """ + contents = {} + try: + with yaml_path.open(encoding="utf-8") as f: + contents = yaml.safe_load(f) + except Exception as e: + logger.warning( + "Error opening file {}: {}".format(str(yaml_path), str(e)), str(yaml_path) + ) + + return contents + + +def are_dirs_equal(dir_1, dir_2): + """ + Compare two directories to see if they are equal. Files in each + directory are assumed to be equal if their names and contents + are equal. + + :param dir_1: First directory path + :param dir_2: Second directory path + :return: True if the directory trees are the same and False otherwise. + """ + + dirs_cmp = filecmp.dircmp(str(dir_1.resolve()), str(dir_2.resolve())) + if dirs_cmp.left_only or dirs_cmp.right_only or dirs_cmp.funny_files: + logger.log("Some files are missing or are funny.") + for file in dirs_cmp.left_only: + logger.log(f"Missing in existing docs: {file}") + for file in dirs_cmp.right_only: + logger.log(f"Missing in new docs: {file}") + for file in dirs_cmp.funny_files: + logger.log(f"The following file is funny: {file}") + return False + + _, mismatch, errors = filecmp.cmpfiles( + str(dir_1.resolve()), str(dir_2.resolve()), dirs_cmp.common_files, shallow=False + ) + + if mismatch or errors: + logger.log(f"Found mismatches: {mismatch}") + + # The root for where to save the diff will be different based on + # whether we are running in CI or not + os_root = pathlib.Path.cwd().anchor + diff_root = pathlib.Path(os_root, "builds", "worker") + if not ON_TRY: + diff_root = pathlib.Path(PerfDocLogger.TOP_DIR, "artifacts") + diff_root.mkdir(parents=True, exist_ok=True) + + diff_path = pathlib.Path(diff_root, "diff.txt") + with diff_path.open("w", encoding="utf-8") as diff_file: + for entry in mismatch: + logger.log(f"Mismatch found on {entry}") + + with pathlib.Path(dir_1, entry).open(encoding="utf-8") as f: + newlines = f.readlines() + with pathlib.Path(dir_2, entry).open(encoding="utf-8") as f: + baselines = f.readlines() + for line in difflib.unified_diff( + baselines, newlines, fromfile="base", tofile="new" + ): + logger.log(line) + + # Here we want to add to diff.txt in a patch format, we use + # the basedir to make the file names/paths relative and this is + # different in CI vs local runs. + basedir = pathlib.Path( + os_root, "builds", "worker", "checkouts", "gecko" + ) + if not ON_TRY: + basedir = diff_root + + relative_path = str(pathlib.Path(dir_2, entry)).split(str(basedir))[-1] + patch = difflib.unified_diff( + baselines, newlines, fromfile=relative_path, tofile=relative_path + ) + + write_header = True + for line in patch: + if write_header: + diff_file.write( + f"diff --git a/{relative_path} b/{relative_path}\n" + ) + write_header = False + diff_file.write(line) + + logger.log(f"Completed diff on {entry}") + + logger.log(f"Saved diff to {diff_path}") + + return False + + for common_dir in dirs_cmp.common_dirs: + subdir_1 = pathlib.Path(dir_1, common_dir) + subdir_2 = pathlib.Path(dir_2, common_dir) + if not are_dirs_equal(subdir_1, subdir_2): + return False + + return True + + +def get_changed_files(top_dir): + """ + Returns the changed files found with duplicates removed. + """ + repo = get_repository_object(top_dir) + return list(set(repo.get_changed_files() + repo.get_outgoing_files())) diff --git a/tools/lint/perfdocs/verifier.py b/tools/lint/perfdocs/verifier.py new file mode 100644 index 0000000000..6603b9acce --- /dev/null +++ b/tools/lint/perfdocs/verifier.py @@ -0,0 +1,601 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import os +import pathlib +import re + +import jsonschema + +from perfdocs.gatherer import Gatherer +from perfdocs.logger import PerfDocLogger +from perfdocs.utils import read_file, read_yaml + +logger = PerfDocLogger() + +""" +Schema for the config.yml file. +Expecting a YAML file with a format such as this: + +name: raptor +manifest: testing/raptor/raptor/raptor.toml +static-only: False +suites: + desktop: + description: "Desktop tests." + tests: + raptor-tp6: "Raptor TP6 tests." + mobile: + description: "Mobile tests" + benchmarks: + description: "Benchmark tests." + tests: + wasm: "All wasm tests." + +""" +CONFIG_SCHEMA = { + "definitions": { + "metrics_schema": { + "metric_name": { + "type": "object", + "properties": { + "aliases": {"type": "array", "items": {"type": "string"}}, + "description": {"type": "string"}, + "matcher": {"type": "string"}, + }, + "required": ["description", "aliases"], + }, + }, + }, + "type": "object", + "properties": { + "name": {"type": "string"}, + "manifest": {"type": "string"}, + "static-only": {"type": "boolean"}, + "metrics": {"$ref": "#/definitions/metrics_schema"}, + "suites": { + "type": "object", + "properties": { + "suite_name": { + "type": "object", + "properties": { + "tests": { + "type": "object", + "properties": { + "test_name": {"type": "string"}, + "metrics": {"$ref": "#/definitions/metrics_schema"}, + }, + }, + "description": {"type": "string"}, + "owner": {"type": "string"}, + "metrics": {"$ref": "#/definitions/metrics_schema"}, + }, + "required": ["description"], + } + }, + }, + }, + "required": ["name", "manifest", "static-only", "suites"], +} + + +class Verifier(object): + """ + Verifier is used for validating the perfdocs folders/tree. In the future, + the generator will make use of this class to obtain a validated set of + descriptions that can be used to build up a document. + """ + + def __init__(self, workspace_dir, taskgraph=None): + """ + Initialize the Verifier. + + :param str workspace_dir: Path to the top-level checkout directory. + """ + self.workspace_dir = workspace_dir + self._gatherer = Gatherer(workspace_dir, taskgraph) + self._compiled_matchers = {} + + def _is_yaml_test_match( + self, target_test_name, test_name, suite="", global_descriptions={} + ): + """Determine if a target name (from a YAML) matches with a test.""" + tb = os.path.basename(target_test_name) + tb = re.sub("\..*", "", tb) + if test_name == tb: + # Found an exact match for the test_name + return True + if test_name in tb: + # Found a 'fuzzy' match for the test_name + # i.e. 'wasm' could exist for all raptor wasm tests + global_descriptions.setdefault(suite, []).append(test_name) + return True + + def _validate_desc_yaml_direction( + self, suite, framework_info, yaml_content, global_descriptions + ): + """Validate the descriptions in the YAML. + + This validation ensures that all tests defined in the YAML exist in the test + harness. Failures here suggest that there's a typo in the YAML or that + a test was removed. + """ + ytests = yaml_content["suites"][suite] + global_descriptions[suite] = [] + if not ytests.get("tests"): + # It's possible a suite entry has no tests + return True + + # Suite found - now check if any tests in YAML + # definitions don't exist + ytests = ytests["tests"] + for test_name in ytests: + foundtest = False + for t in framework_info["test_list"][suite]: + if self._is_yaml_test_match( + t, test_name, suite=suite, global_descriptions=global_descriptions + ): + foundtest = True + break + if not foundtest: + logger.warning( + "Could not find an existing test for {} - bad test name?".format( + test_name + ), + framework_info["yml_path"], + ) + return False + + def _validate_desc_harness_direction( + self, suite, test_list, yaml_content, global_descriptions + ): + """Validate that the tests have a description in the YAML. + + This stage of validation ensures that all the tests have some + form of description, or that global descriptions are available. + Failures here suggest a new test was added, or the config.yml + file was changed. + """ + # If only a description is provided for the suite, assume + # that this is a suite-wide description and don't check for + # it's tests + stests = yaml_content["suites"][suite].get("tests", None) + if not stests: + return + + tests_found = 0 + missing_tests = [] + test_to_manifest = {} + for test_name, test_info in test_list.items(): + manifest_path = test_info.get("path", test_info.get("manifest", "")) + tb = os.path.basename(manifest_path) + tb = re.sub("\..*", "", tb) + if ( + stests.get(tb, None) is not None + or stests.get(test_name, None) is not None + ): + # Test description exists, continue with the next test + tests_found += 1 + continue + test_to_manifest[test_name] = manifest_path + missing_tests.append(test_name) + + # Check if global test descriptions exist (i.e. + # ones that cover all of tp6) for the missing tests + new_mtests = [] + for mt in missing_tests: + found = False + for test_name in global_descriptions[suite]: + # Global test exists for this missing test + if mt.startswith(test_name): + found = True + break + if test_name in mt: + found = True + break + if not found: + new_mtests.append(mt) + + if len(new_mtests): + # Output an error for each manifest with a missing + # test description + for test_name in new_mtests: + logger.warning( + "Could not find a test description for {}".format(test_name), + test_to_manifest[test_name], + ) + + def _match_metrics(self, target_metric_name, target_metric_info, measured_metrics): + """Find all metrics that match the given information. + + It either checks for the metric through a direct equality check, and if + a regex matcher was provided, we will use that afterwards. + """ + verified_metrics = [] + + metric_names = target_metric_info["aliases"] + [target_metric_name] + for measured_metric in measured_metrics: + if measured_metric in metric_names: + verified_metrics.append(measured_metric) + + if target_metric_info.get("matcher", ""): + # Compile the regex separately to capture issues in the regex + # compilation + matcher = self._compiled_matchers.get(target_metric_name, None) + if not matcher: + matcher = re.compile(target_metric_info.get("matcher")) + self._compiled_matchers[target_metric_name] = matcher + + # Search the measured metrics + for measured_metric in measured_metrics: + if matcher.search(measured_metric): + verified_metrics.append(measured_metric) + + return verified_metrics + + def _validate_metrics_yaml_direction( + self, suite, framework_info, yaml_content, global_metrics + ): + """Validate the metric descriptions in the YAML. + + This direction (`yaml_direction`) checks that the YAML definitions exist in + the test harness as real metrics. Failures here suggest that a metric + changed name, is missing an alias, is misnamed, duplicated, or was removed. + """ + yaml_suite = yaml_content["suites"][suite] + suite_metrics = yaml_suite.get("metrics", {}) + + # Check to make sure all the metrics with given descriptions + # are actually being measured. Add the metric to the "verified" field in + # global_metrics to use it later for "global" metrics that can + # have their descriptions removed. Start from the test level. + for test_name, test_info in yaml_suite.get("tests", {}).items(): + if not isinstance(test_info, dict): + continue + test_metrics_info = test_info.get("metrics", {}) + + # Find all tests that match with this name in case they measure + # different things + measured_metrics = [] + for t in framework_info["test_list"][suite]: + if not self._is_yaml_test_match(t, test_name): + # Check to make sure we are checking against the right + # test. Skip the metric check if we can't find the test. + continue + measured_metrics.extend( + framework_info["test_list"][suite][t].get("metrics", []) + ) + + if len(measured_metrics) == 0: + continue + + # Check if all the test metrics documented exist + for metric_name, metric_info in test_metrics_info.items(): + verified_metrics = self._match_metrics( + metric_name, metric_info, measured_metrics + ) + if len(verified_metrics) > 0: + global_metrics["yaml-verified"].extend( + [metric_name] + metric_info["aliases"] + ) + global_metrics["verified"].extend( + [metric_name] + metric_info["aliases"] + verified_metrics + ) + else: + logger.warning( + ( + "Cannot find documented metric `{}` " + "being used in the specified test `{}`." + ).format(metric_name, test_name), + framework_info["yml_path"], + ) + + # Check the suite level now + for suite_metric_name, suite_metric_info in suite_metrics.items(): + measured_metrics = [] + for _, test_info in framework_info["test_list"][suite].items(): + measured_metrics.extend(test_info.get("metrics", [])) + + verified_metrics = self._match_metrics( + suite_metric_name, suite_metric_info, measured_metrics + ) + if len(verified_metrics) > 0: + global_metrics["yaml-verified"].extend( + [suite_metric_name] + suite_metric_info["aliases"] + ) + global_metrics["verified"].extend( + [suite_metric_name] + + suite_metric_info["aliases"] + + verified_metrics + ) + else: + logger.warning( + ( + "Cannot find documented metric `{}` " + "being used in the specified suite `{}`." + ).format(suite_metric_name, suite), + framework_info["yml_path"], + ) + + # Finally check the global level (output failures later) + all_measured_metrics = [] + for _, test_info in framework_info["test_list"][suite].items(): + all_measured_metrics.extend(test_info.get("metrics", [])) + for global_metric_name, global_metric_info in global_metrics["global"].items(): + verified_metrics = self._match_metrics( + global_metric_name, global_metric_info, all_measured_metrics + ) + if global_metric_info.get("verified", False): + # We already verified this global metric, but add any + # extra verified metrics here + global_metrics["verified"].extend(verified_metrics) + continue + if len(verified_metrics) > 0: + global_metric_info["verified"] = True + global_metrics["yaml-verified"].extend( + [global_metric_name] + global_metric_info["aliases"] + ) + global_metrics["verified"].extend( + [global_metric_name] + + global_metric_info["aliases"] + + verified_metrics + ) + + def _validate_metrics_harness_direction( + self, suite, test_list, yaml_content, global_metrics + ): + """Validate that metrics in the harness are documented.""" + # Gather all the metrics being measured + all_measured_metrics = {} + for test_name, test_info in test_list.items(): + metrics = test_info.get("metrics", []) + for metric in metrics: + all_measured_metrics.setdefault(metric, []).append(test_name) + + if len(all_measured_metrics) == 0: + # There are no metrics measured by this suite + return + + for metric, tests in all_measured_metrics.items(): + if metric not in global_metrics["verified"]: + # Log a warning in all files that have this metric + for test in tests: + logger.warning( + "Missing description for the metric `{}` in test `{}`".format( + metric, test + ), + test_list[test].get( + "path", test_list[test].get("manifest", "") + ), + ) + + def validate_descriptions(self, framework_info): + """ + Cross-validate the tests found in the manifests and the YAML + test definitions. This function doesn't return a valid flag. Instead, + the StructDocLogger.VALIDATION_LOG is used to determine validity. + + The validation proceeds as follows: + 1. Check that all tests/suites in the YAML exist in the manifests. + - At the same time, build a list of global descriptions which + define descriptions for groupings of tests. + 2. Check that all tests/suites found in the manifests exist in the YAML. + - For missing tests, check if a global description for them exists. + + As the validation is completed, errors are output into the validation log + for any issues that are found. + + The same is done for the metrics field expect it also has regex matching, + and the definitions cannot be duplicated in a single harness. We make use + of two `*verified` fields to simplify the two stages/directions, and checking + for any duplication. + + :param dict framework_info: Contains information about the framework. See + `Gatherer.get_test_list` for information about its structure. + """ + yaml_content = framework_info["yml_content"] + + # Check for any bad test/suite names in the yaml config file + # TODO: Combine global settings into a single dictionary + global_descriptions = {} + global_metrics = { + "global": yaml_content.get("metrics", {}), + "verified": [], + "yaml-verified": [], + } + for suite, ytests in yaml_content["suites"].items(): + # Find the suite, then check against the tests within it + if framework_info["test_list"].get(suite, None) is None: + logger.warning( + "Could not find an existing suite for {} - bad suite name?".format( + suite + ), + framework_info["yml_path"], + ) + continue + + # Validate descriptions + self._validate_desc_yaml_direction( + suite, framework_info, yaml_content, global_descriptions + ) + + # Validate metrics + self._validate_metrics_yaml_direction( + suite, framework_info, yaml_content, global_metrics + ) + + # The suite and test levels were properly checked, but we can only + # check the global level after all suites were checked. If the metric + # isn't in the verified + for global_metric_name, _ in global_metrics["global"].items(): + if global_metric_name not in global_metrics["verified"]: + logger.warning( + ( + "Cannot find documented metric `{}` " + "being used in the specified harness `{}`." + ).format(global_metric_name, yaml_content["name"]), + framework_info["yml_path"], + ) + + # Check for duplicate metrics/aliases in the verified metrics + unique_metrics = set() + warned = set() + for metric in global_metrics["yaml-verified"]: + if ( + metric in unique_metrics or unique_metrics.add(metric) + ) and metric not in warned: + logger.warning( + "Duplicate definitions found for `{}`.".format(metric), + framework_info["yml_path"], + ) + warned.add(metric) + + # Check for duplicate metrics in the global level + unique_metrics = set() + warned = set() + for metric, metric_info in global_metrics["global"].items(): + if ( + metric in unique_metrics or unique_metrics.add(metric) + ) and metric not in warned: + logger.warning( + "Duplicate definitions found for `{}`.".format(metric), + framework_info["yml_path"], + ) + for alias in metric_info.get("aliases", []): + unique_metrics.add(alias) + warned.add(alias) + warned.add(metric) + + # Check for any missing tests/suites + for suite, test_list in framework_info["test_list"].items(): + if not yaml_content["suites"].get(suite): + # Description doesn't exist for the suite + logger.warning( + "Missing suite description for {}".format(suite), + yaml_content["manifest"], + ) + continue + + self._validate_desc_harness_direction( + suite, test_list, yaml_content, global_descriptions + ) + + self._validate_metrics_harness_direction( + suite, test_list, yaml_content, global_metrics + ) + + def validate_yaml(self, yaml_path): + """ + Validate that the YAML file has all the fields that are + required and parse the descriptions into strings in case + some are give as relative file paths. + + :param str yaml_path: Path to the YAML to validate. + :return bool: True/False => Passed/Failed Validation + """ + + def _get_description(desc): + """ + Recompute the description in case it's a file. + """ + desc_path = pathlib.Path(self.workspace_dir, desc) + + try: + if desc_path.exists() and desc_path.is_file(): + with open(desc_path, "r") as f: + desc = f.readlines() + except OSError: + pass + + return desc + + def _parse_descriptions(content): + for suite, sinfo in content.items(): + desc = sinfo["description"] + sinfo["description"] = _get_description(desc) + + # It's possible that the suite has no tests and + # only a description. If they exist, then parse them. + if "tests" in sinfo: + for test, desc in sinfo["tests"].items(): + sinfo["tests"][test] = _get_description(desc) + + valid = False + yaml_content = read_yaml(yaml_path) + + try: + jsonschema.validate(instance=yaml_content, schema=CONFIG_SCHEMA) + _parse_descriptions(yaml_content["suites"]) + valid = True + except Exception as e: + logger.warning("YAML ValidationError: {}".format(str(e)), yaml_path) + + return valid + + def validate_rst_content(self, rst_path): + """ + Validate that the index file given has a {documentation} entry + so that the documentation can be inserted there. + + :param str rst_path: Path to the RST file. + :return bool: True/False => Passed/Failed Validation + """ + rst_content = read_file(rst_path) + + # Check for a {documentation} entry in some line, + # if we can't find one, then the validation fails. + valid = False + docs_match = re.compile(".*{documentation}.*") + for line in rst_content: + if docs_match.search(line): + valid = True + break + if not valid: + logger.warning( # noqa: PLE1205 + "Cannot find a '{documentation}' entry in the given index file", + rst_path, + ) + + return valid + + def _check_framework_descriptions(self, item): + """ + Helper method for validating descriptions + """ + framework_info = self._gatherer.get_test_list(item) + self.validate_descriptions(framework_info) + + def validate_tree(self): + """ + Validate the `perfdocs` directory that was found. + Returns True if it is good, false otherwise. + + :return bool: True/False => Passed/Failed Validation + """ + found_good = 0 + + # For each framework, check their files and validate descriptions + for matched in self._gatherer.perfdocs_tree: + # Get the paths to the YAML and RST for this framework + matched_yml = pathlib.Path(matched["path"], matched["yml"]) + matched_rst = pathlib.Path(matched["path"], matched["rst"]) + + _valid_files = { + "yml": self.validate_yaml(matched_yml), + "rst": True, + } + if not read_yaml(matched_yml)["static-only"]: + _valid_files["rst"] = self.validate_rst_content(matched_rst) + + # Log independently the errors found for the matched files + for file_format, valid in _valid_files.items(): + if not valid: + logger.log("File validation error: {}".format(file_format)) + if not all(_valid_files.values()): + continue + found_good += 1 + + self._check_framework_descriptions(matched) + + if not found_good: + raise Exception("No valid perfdocs directories found") |