diff options
Diffstat (limited to 'toolkit/components/glean/build_scripts/perf_data_review.py')
-rw-r--r-- | toolkit/components/glean/build_scripts/perf_data_review.py | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/toolkit/components/glean/build_scripts/perf_data_review.py b/toolkit/components/glean/build_scripts/perf_data_review.py new file mode 100644 index 0000000000..8c84249a2a --- /dev/null +++ b/toolkit/components/glean/build_scripts/perf_data_review.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Produce skeleton Performance Data Review Requests. + +This was mostly copies from glean_parser, and should be kept in sync. +""" + +import re +from pathlib import Path +from typing import Sequence + +from glean_parser import parser, util + + +def generate( + bug: str, + metrics_files: Sequence[Path], +) -> int: + """ + Commandline helper for Data Review Request template generation. + + :param bug: pattern to match in metrics' bug_numbers lists. + :param metrics_files: List of Path objects to load metrics from. + :return: Non-zero if there were any errors. + """ + + metrics_files = util.ensure_list(metrics_files) + + # Accept any value of expires. + parser_options = { + "allow_reserved": True, + "custom_is_expired": lambda expires: False, + "custom_validate_expires": lambda expires: True, + } + all_objects = parser.parse_objects(metrics_files, parser_options) + + if util.report_validation_errors(all_objects): + return 1 + + # I tried [\W\Z] but it complained. So `|` it is. + reobj = re.compile(f"\\W{bug}\\W|\\W{bug}$") + durations = set() + responsible_emails = set() + metrics_table = "" + for category_name, metrics in all_objects.value.items(): + for metric in metrics.values(): + if not any([len(reobj.findall(bug)) == 1 for bug in metric.bugs]): + continue + + metric_name = util.snake_case(metric.name) + category_name = util.snake_case(category_name) + one_line_desc = metric.description.replace("\n", " ") + sensitivity = ", ".join([s.name for s in metric.data_sensitivity]) + last_bug = metric.bugs[-1] + metrics_table += f"`{category_name}.{metric_name}` | " + metrics_table += f"{one_line_desc} | {sensitivity} | {last_bug}\n" + if metric.type == "event" and len(metric.allowed_extra_keys): + for extra_name, extra_detail in metric.extra_keys.items(): + extra_one_line_desc = extra_detail["description"].replace("\n", " ") + metrics_table += f"`{category_name}.{metric_name}#{extra_name}` | " + metrics_table += ( + f"{extra_one_line_desc} | {sensitivity} | {last_bug}\n" + ) + + durations.add(metric.expires) + + if metric.expires == "never": + responsible_emails.update(metric.notification_emails) + + if len(durations) == 1: + duration = next(iter(durations)) + if duration == "never": + collection_duration = "This collection will be collected permanently." + else: + collection_duration = f"This collection has expiry '{duration}'" + else: + collection_duration = "Parts of this collection expire at different times: " + collection_duration += f"{durations}" + + if "never" in durations: + collection_duration += "\n" + ", ".join(responsible_emails) + " " + collection_duration += "will be responsible for the permanent collections." + + if len(durations) == 0: + print(f"I'm sorry, I couldn't find metrics matching the bug number {bug}.") + return 1 + + # This template is pulled from + # https://github.com/mozilla/data-review/blob/main/request.md + print( + """ +!! Reminder: it is your responsibility to complete and check the correctness of +!! this automatically-generated request skeleton before requesting Data +!! Collection Review. See https://wiki.mozilla.org/Data_Collection for details. + +DATA REVIEW REQUEST +1. What questions will you answer with this data? + +TODO: Fill this in. + +2. Why does Mozilla need to answer these questions? Are there benefits for users? + Do we need this information to address product or business requirements? + +In order to guarantee the performance of our products, it is vital to monitor +real-world installs used by real-world users. + +3. What alternative methods did you consider to answer these questions? + Why were they not sufficient? + +Our ability to measure the practical performance impact of changes through CI +and manual testing is limited. Monitoring the performance of our products in +the wild among real users is the only way to be sure we have an accurate +picture. + +4. Can current instrumentation answer these questions? + +No. + +5. List all proposed measurements and indicate the category of data collection for each + measurement, using the Firefox data collection categories found on the Mozilla wiki. + +Measurement Name | Measurement Description | Data Collection Category | Tracking Bug +---------------- | ----------------------- | ------------------------ | ------------""" + ) + print(metrics_table) + print( + """ +6. Please provide a link to the documentation for this data collection which + describes the ultimate data set in a public, complete, and accurate way. + +This collection is Glean so is documented +[in the Glean Dictionary](https://dictionary.telemetry.mozilla.org). + +7. How long will this data be collected? +""" + ) + print(collection_duration) + print( + """ +8. What populations will you measure? + +All channels, countries, and locales. No filters. + +9. If this data collection is default on, what is the opt-out mechanism for users? + +These collections are Glean. The opt-out can be found in the product's preferences. + +10. Please provide a general description of how you will analyze this data. + +This will be continuously monitored for regression and improvement detection. + +11. Where do you intend to share the results of your analysis? + +Internal monitoring (GLAM, Redash, Looker, etc.). + +12. Is there a third-party tool (i.e. not Telemetry) that you + are proposing to use for this data collection? + +No. +""" + ) + + return 0 |