From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- .../glean_parser-7.2.1.dist-info/AUTHORS.md | 17 + .../glean_parser-7.2.1.dist-info/LICENSE | 373 +++++++++++ .../glean_parser-7.2.1.dist-info/METADATA | 726 ++++++++++++++++++++ .../glean_parser-7.2.1.dist-info/RECORD | 40 ++ .../glean_parser-7.2.1.dist-info/WHEEL | 5 + .../glean_parser-7.2.1.dist-info/entry_points.txt | 3 + .../glean_parser-7.2.1.dist-info/top_level.txt | 1 + .../python/glean_parser/glean_parser/__init__.py | 18 + .../python/glean_parser/glean_parser/__main__.py | 349 ++++++++++ .../python/glean_parser/glean_parser/coverage.py | 140 ++++ .../glean_parser/glean_parser/data_review.py | 79 +++ .../python/glean_parser/glean_parser/javascript.py | 322 +++++++++ .../python/glean_parser/glean_parser/kotlin.py | 356 ++++++++++ .../python/glean_parser/glean_parser/lint.py | 538 +++++++++++++++ .../python/glean_parser/glean_parser/markdown.py | 273 ++++++++ .../python/glean_parser/glean_parser/metrics.py | 435 ++++++++++++ .../python/glean_parser/glean_parser/parser.py | 446 +++++++++++++ .../python/glean_parser/glean_parser/pings.py | 97 +++ .../python/glean_parser/glean_parser/rust.py | 218 ++++++ .../glean_parser/schemas/metrics.1-0-0.schema.yaml | 605 +++++++++++++++++ .../glean_parser/schemas/metrics.2-0-0.schema.yaml | 735 +++++++++++++++++++++ .../glean_parser/schemas/pings.1-0-0.schema.yaml | 157 +++++ .../glean_parser/schemas/pings.2-0-0.schema.yaml | 169 +++++ .../glean_parser/schemas/tags.1-0-0.schema.yaml | 51 ++ .../python/glean_parser/glean_parser/swift.py | 260 ++++++++ .../python/glean_parser/glean_parser/tags.py | 49 ++ .../glean_parser/templates/data_review.jinja2 | 82 +++ .../templates/javascript.buildinfo.jinja2 | 11 + .../glean_parser/templates/javascript.jinja2 | 73 ++ .../glean_parser/templates/kotlin.buildinfo.jinja2 | 31 + .../glean_parser/templates/kotlin.geckoview.jinja2 | 124 ++++ .../glean_parser/templates/kotlin.jinja2 | 133 ++++ .../glean_parser/templates/markdown.jinja2 | 98 +++ .../glean_parser/templates/qmldir.jinja2 | 4 + .../glean_parser/templates/rust.jinja2 | 276 ++++++++ .../glean_parser/templates/swift.jinja2 | 138 ++++ .../python/glean_parser/glean_parser/translate.py | 227 +++++++ .../glean_parser/translation_options.py | 54 ++ .../python/glean_parser/glean_parser/util.py | 560 ++++++++++++++++ .../glean_parser/glean_parser/validate_ping.py | 74 +++ 40 files changed, 8347 insertions(+) create mode 100644 third_party/python/glean_parser/glean_parser-7.2.1.dist-info/AUTHORS.md create mode 100644 third_party/python/glean_parser/glean_parser-7.2.1.dist-info/LICENSE create mode 100644 third_party/python/glean_parser/glean_parser-7.2.1.dist-info/METADATA create mode 100644 third_party/python/glean_parser/glean_parser-7.2.1.dist-info/RECORD create mode 100644 third_party/python/glean_parser/glean_parser-7.2.1.dist-info/WHEEL create mode 100644 third_party/python/glean_parser/glean_parser-7.2.1.dist-info/entry_points.txt create mode 100644 third_party/python/glean_parser/glean_parser-7.2.1.dist-info/top_level.txt create mode 100644 third_party/python/glean_parser/glean_parser/__init__.py create mode 100644 third_party/python/glean_parser/glean_parser/__main__.py create mode 100644 third_party/python/glean_parser/glean_parser/coverage.py create mode 100644 third_party/python/glean_parser/glean_parser/data_review.py create mode 100644 third_party/python/glean_parser/glean_parser/javascript.py create mode 100644 third_party/python/glean_parser/glean_parser/kotlin.py create mode 100644 third_party/python/glean_parser/glean_parser/lint.py create mode 100644 third_party/python/glean_parser/glean_parser/markdown.py create mode 100644 third_party/python/glean_parser/glean_parser/metrics.py create mode 100644 third_party/python/glean_parser/glean_parser/parser.py create mode 100644 third_party/python/glean_parser/glean_parser/pings.py create mode 100644 third_party/python/glean_parser/glean_parser/rust.py create mode 100644 third_party/python/glean_parser/glean_parser/schemas/metrics.1-0-0.schema.yaml create mode 100644 third_party/python/glean_parser/glean_parser/schemas/metrics.2-0-0.schema.yaml create mode 100644 third_party/python/glean_parser/glean_parser/schemas/pings.1-0-0.schema.yaml create mode 100644 third_party/python/glean_parser/glean_parser/schemas/pings.2-0-0.schema.yaml create mode 100644 third_party/python/glean_parser/glean_parser/schemas/tags.1-0-0.schema.yaml create mode 100644 third_party/python/glean_parser/glean_parser/swift.py create mode 100644 third_party/python/glean_parser/glean_parser/tags.py create mode 100644 third_party/python/glean_parser/glean_parser/templates/data_review.jinja2 create mode 100644 third_party/python/glean_parser/glean_parser/templates/javascript.buildinfo.jinja2 create mode 100644 third_party/python/glean_parser/glean_parser/templates/javascript.jinja2 create mode 100644 third_party/python/glean_parser/glean_parser/templates/kotlin.buildinfo.jinja2 create mode 100644 third_party/python/glean_parser/glean_parser/templates/kotlin.geckoview.jinja2 create mode 100644 third_party/python/glean_parser/glean_parser/templates/kotlin.jinja2 create mode 100644 third_party/python/glean_parser/glean_parser/templates/markdown.jinja2 create mode 100644 third_party/python/glean_parser/glean_parser/templates/qmldir.jinja2 create mode 100644 third_party/python/glean_parser/glean_parser/templates/rust.jinja2 create mode 100644 third_party/python/glean_parser/glean_parser/templates/swift.jinja2 create mode 100644 third_party/python/glean_parser/glean_parser/translate.py create mode 100644 third_party/python/glean_parser/glean_parser/translation_options.py create mode 100644 third_party/python/glean_parser/glean_parser/util.py create mode 100644 third_party/python/glean_parser/glean_parser/validate_ping.py (limited to 'third_party/python/glean_parser') diff --git a/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/AUTHORS.md b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/AUTHORS.md new file mode 100644 index 0000000000..525116ee7e --- /dev/null +++ b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/AUTHORS.md @@ -0,0 +1,17 @@ +# Credits + +## Development Lead + +- Jan-Erik Rediger +- Alessio Placitelli + +## Contributors + +See [the full list of contributors](https://github.com/mozilla/glean_parser/graphs/contributors). + +## Acknowledgements + +This package was created with +[Cookiecutter](https://github.com/audreyr/cookiecutter) and the +[audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage) +project template. diff --git a/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/LICENSE b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/LICENSE new file mode 100644 index 0000000000..a612ad9813 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/METADATA b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/METADATA new file mode 100644 index 0000000000..4f3b85647f --- /dev/null +++ b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/METADATA @@ -0,0 +1,726 @@ +Metadata-Version: 2.1 +Name: glean-parser +Version: 7.2.1 +Summary: Parser tools for Mozilla's Glean telemetry +Home-page: https://github.com/mozilla/glean_parser +Author: The Glean Team +Author-email: glean-team@mozilla.com +License: UNKNOWN +Keywords: glean_parser +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Natural Language :: English +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Description-Content-Type: text/markdown +Requires-Dist: appdirs (>=1.4) +Requires-Dist: Click (>=7) +Requires-Dist: diskcache (>=4) +Requires-Dist: Jinja2 (>=2.10.1) +Requires-Dist: MarkupSafe (<=2.0.1,>=1.1.1) +Requires-Dist: jsonschema (>=3.0.2) +Requires-Dist: PyYAML (>=5.3.1) +Requires-Dist: iso8601 (>=0.1.10) ; python_version <= "3.6" + +# Glean Parser + +Parser tools for Mozilla's Glean telemetry. + +## Features + +Contains various utilities for handling `metrics.yaml` and `pings.yaml` for [the +Glean SDKs](https://mozilla.github.io/glean). This includes producing generated +code for various integrations, linting and coverage testing. + +## Documentation + +- [How to Contribute](https://github.com/mozilla/glean_parser/blob/main/CONTRIBUTING.md). Please file bugs in [bugzilla](https://bugzilla.mozilla.org/enter_bug.cgi?assigned_to=nobody%40mozilla.org&bug_ignored=0&bug_severity=normal&bug_status=NEW&cf_fission_milestone=---&cf_fx_iteration=---&cf_fx_points=---&cf_status_firefox65=---&cf_status_firefox66=---&cf_status_firefox67=---&cf_status_firefox_esr60=---&cf_status_thunderbird_esr60=---&cf_tracking_firefox65=---&cf_tracking_firefox66=---&cf_tracking_firefox67=---&cf_tracking_firefox_esr60=---&cf_tracking_firefox_relnote=---&cf_tracking_thunderbird_esr60=---&product=Data%20Platform%20and%20Tools&component=Glean%3A%20SDK&contenttypemethod=list&contenttypeselection=text%2Fplain&defined_groups=1&flag_type-203=X&flag_type-37=X&flag_type-41=X&flag_type-607=X&flag_type-721=X&flag_type-737=X&flag_type-787=X&flag_type-799=X&flag_type-800=X&flag_type-803=X&flag_type-835=X&flag_type-846=X&flag_type-855=X&flag_type-864=X&flag_type-916=X&flag_type-929=X&flag_type-930=X&flag_type-935=X&flag_type-936=X&flag_type-937=X&form_name=enter_bug&maketemplate=Remember%20values%20as%20bookmarkable%20template&op_sys=Unspecified&priority=P3&&rep_platform=Unspecified&status_whiteboard=%5Btelemetry%3Aglean-rs%3Am%3F%5D&target_milestone=---&version=unspecified). +- [User documentation for Glean](https://mozilla.github.io/glean/). +- [`glean_parser` developer documentation](https://mozilla.github.io/glean_parser/). + +## Requirements + +- Python 3.6 (or later) + +The following library requirements are installed automatically when +`glean_parser` is installed by `pip`. + +- appdirs +- Click +- diskcache +- Jinja2 +- jsonschema +- PyYAML + +Additionally on Python 3.6: + +- iso8601 + +## Usage + +```sh +$ glean_parser --help +``` + +Read in `metrics.yaml`, translate to Kotlin format, and +output to `output_dir`: + +```sh +$ glean_parser translate -o output_dir -f kotlin metrics.yaml +``` + +Check a Glean ping against the ping schema: + +```sh +$ glean_parser check < ping.json +``` + + +# Changelog + +## Unreleased + +## 7.2.1 + +- Unbreak last minor release ([#579](https://github.com/mozilla/glean_parser/pull/579)) + +## 7.2.0 + +- Remove yamllint integration ([#578](https://github.com/mozilla/glean_parser/pull/578)) + +## 7.1.0 + +- ENHANCEMENT: Labels in `labels:` fields may now contain any printable ASCII characters ([bug 1672273](https://bugzilla.mozilla.org/show_bug.cgi?id=1672273)) +- BUGFIX: Enforce ordering of generation of Pings, Metrics and Tags such that order is deterministic ([bug 1820334](https://bugzilla.mozilla.org/show_bug.cgi?id=1820334)) + +## 7.0.0 + +- BUGFIX: Remove internal-only fields from serialized metrics data ([#550](https://github.com/mozilla/glean_parser/pull/550)) +- FEATURE: New subcommand: `dump` to dump the metrics data as JSON ([#550](https://github.com/mozilla/glean_parser/pull/550)) +- BUGFIX: Kotlin: Generate enums with the right generic bound for ping reason codes ([#551](https://github.com/mozilla/glean_parser/pull/551)). +- **BREAKING CHANGE:** Fully remove support for the old events API ([#549](https://github.com/mozilla/glean_parser/pull/549)) + Adds a new lint `OLD_EVENT_API` to warn about missing `type` attributes on event extra keys. + Note that the Glean SDK already dropped support for the old events API. + +## 6.4.0 + +- BUGFIX: Correct code generation for labeled metrics in Rust ([#533](https://github.com/mozilla/glean_parser/pull/533)) +- BUGFIX: Correctly serialize `Rates` for Rust code ([#530](https://github.com/mozilla/glean_parser/pull/530)) +- Feature: Wrap labeled metric's static labels list as CoW strings (requires updated Glean support) ([#534](https://github.com/mozilla/glean_parser/pull/534)) + +## 6.3.0 + +- events: Increase extras limit to 15 ([bug 1798713](https://bugzilla.mozilla.org/show_bug.cgi?id=1798713)) + +## 6.2.1 + +- Add support for Rate, Denominator and Numerator metrics for JavaScript. ([bug 1793777](https://bugzilla.mozilla.org/show_bug.cgi?id=1793777)) + +## 6.2.0 + +- [data-review] Use a template to generate the Data Review Request template ([bug 1772605](https://bugzilla.mozilla.org/show_bug.cgi?id=1772605)) +- Make tag and no\_lint order deterministic ([#518](https://github.com/mozilla/glean_parser/pull/518)) + +## 6.1.2 + +- Swift: Add a conditional `import Foundation` to support generating metrics when Glean is delivered via the AppServices iOS megazord + +## 6.1.1 + +- Rust: Use correct name for a ping in generated code. + +## 6.1.0 + +- [data-review] Include extra keys' names and descriptions in data review template ([bug 1767027](https://bugzilla.mozilla.org/show_bug.cgi?id=1767027)) +- Raise limit on number of statically-defined labels to 4096. ([bug 1772163](https://bugzilla.mozilla.org/show_bug.cgi?id=1772163)) +- Fix Rust code generation for new UniFFI interface ([#491](https://github.com/mozilla/glean_parser/pull/491), [#494](https://github.com/mozilla/glean_parser/pull/494), [#495](https://github.com/mozilla/glean_parser/pull/495)) + +## 6.0.1 + +- Relax version requirement for MarkupSafe. + Now works with MarkupSafe v1.1.1 to v2.0.1 inclusive again. + +## 6.0.0 + +- BUGFIX: Add missing `extra_args` to Rust constructor generation ([bug 1765855](https://bugzilla.mozilla.org/show_bug.cgi?id=1765855)) +- **Breaking change:** `glean_parser` now generates metrics compatible with the UniFFI-powered Glean SDK. + This is not backwards-compatible with previous versions. +- Generate Rate, Denominator and Numerator metrics for Kotlin and Swift +- Explicitly skip Rate, Denominator and Numerator metrics for JavaScript. + These will cause a build failure by default, but can be turned into warnings on request. + Use `-s fail_rates=false` to enable warning-only mode. + +## 5.1.2 + +- BUGFIX: Revert changes made on v5.1.1. + - The issues addressed by those changes, were non-issues and result of misuse of the APIs. + +## 5.1.1 + +- BUGFIX: Fix issues with Swift templates ([bug 1749494](https://bugzilla.mozilla.org/show_bug.cgi?id=1749494)) + - Make metrics and pings all `public` + - Make pings `static` + +## 5.1.0 + +- Add support for build info generation for JavaScript and Typescript targets ([bug 1749494](https://bugzilla.mozilla.org/show_bug.cgi?id=1749494)) + +## 5.0.1 + +- Fix the logic for the metric expiration by version ([bug 1753194](https://bugzilla.mozilla.org/show_bug.cgi?id=1753194)) + +## 5.0.0 + +- Remove C# support ([#436](https://github.com/mozilla/glean_parser/pull/436)). +- Add support for Rust code generation ([bug 1677434](https://bugzilla.mozilla.org/show_bug.cgi?id=1677434)) +- Report an error if no files are passed ([bug 1751730](https://bugzilla.mozilla.org/show_bug.cgi?id=1751730)) +- [data-review] Report an error if no metrics match provided bug number ([bug 1752576](https://bugzilla.mozilla.org/show_bug.cgi?id=1752576)) +- [data-review] Include notification_emails in list of those responsible ([bug 1752576](https://bugzilla.mozilla.org/show_bug.cgi?id=1752576)) +- Add support for expiring metrics by the provided major version ([bug 1753194](https://bugzilla.mozilla.org/show_bug.cgi?id=1753194)) + +## 4.4.0 + +- Support global file-level tags in metrics.yaml ([bug 1745283](https://bugzilla.mozilla.org/show_bug.cgi?id=1745283)) +- Glinter: Reject metric files if they use `unit` by mistake. It should be `time_unit` ([#432](https://github.com/mozilla/glean_parser/pull/432)). +- Automatically generate a build date when generating build info ([#431](https://github.com/mozilla/glean_parser/pull/431)). + Enabled for Kotlin and Swift. + This can be changed with the `build_date` command line option. + `build_date=0` will use a static unix epoch time. + `build_date=2022-01-03T17:30:00` will parse the ISO8601 string to use (as a UTC timestamp). + Other values will throw an error. + + Example: + + glean_parser translate --format kotlin --option build_date=2021-11-01T01:00:00 path/to/metrics.yaml + +## 4.3.1 + +- BUGFIX: Skip tags for code generation ([#409](https://github.com/mozilla/glean_parser/pull/409)) + +## 4.3.0 + +- Support tags in glean parser ([bug 1734011](https://bugzilla.mozilla.org/show_bug.cgi?id=1734011)) + +## 4.2.0 + +- Improve the schema validation error messages. They will no longer include `OrderedDict(...)` on Python 3.7 and later ([bug 1733395](https://bugzilla.mozilla.org/show_bug.cgi?id=1733395)) +- Officially support Python 3.10 + +## 4.1.1 (2021-09-28) + +- Update private import paths on Javascript / Typescript templates. ([bug 1702468](https://bugzilla.mozilla.org/show_bug.cgi?id=1702468)) + +## 4.1.0 (2021-09-16) + +- Add support for Node.js platform on Javascript / Typescript templates. ([bug 1728982](https://bugzilla.mozilla.org/show_bug.cgi?id=1728982)) + +## 4.0.0 (2021-08-20) + +- Add support for Text metric type ([#374](https://github.com/mozilla/glean_parser/pull/374)) +- Reserve the `default` ping name. It can't be used as a ping name, but it can be used in `send_in_pings` ([#376](https://github.com/mozilla/glean_parser/pull/376)) + +## 3.8.0 (2021-08-18) + +- Expose ping reasons enum on JavaScript / TypeScript templates. ([bug 1719136](https://bugzilla.mozilla.org/show_bug.cgi?id=1719136)) +- Define an interface with the allowed extras for each event on the TypeScript template. ([bug 1693487](https://bugzilla.mozilla.org/show_bug.cgi?id=1693487)) + +## 3.7.0 (2021-07-13) + +- New lint: Check for redundant words in ping names ([#355](https://github.com/mozilla/glean_parser/pull/355)) +- Add support for URL metric type ([#361](https://github.com/mozilla/glean_parser/pull/361)) + +## 3.6.0 (2021-06-11) + +- Add a command `data-review` to generate a skeleton Data Review Request for all metrics matching a supplied bug number. ([bug 1704541](https://bugzilla.mozilla.org/show_bug.cgi?id=1704541)) +- Enable custom distribution outside of GeckoView (`gecko_datapoint` becomes optional) + +## 3.5.0 (2021-06-03) + +- Transform generated folder into QML Module when building Javascript templates for the Qt platform. ([bug 1707896](https://bugzilla.mozilla.org/show_bug.cgi?id=1707896) + - Import the Glean QML module from inside each generated file, removing the requirement to import Glean before importing any of the generated files; + - Prodive a `qmldir` file exposing all generated files; + - Drop the `namespace` option for Javascript templates; + - Add a new `version` option for Javascript templates, required when building for Qt, which expected the Glean QML module version. + +## 3.4.0 (2021-05-28) + +- Add missing import for Kotlin code ([#339](https://github.com/mozilla/glean_parser/pull/339)) +- Use a plain Kotlin type in the generated interface implementation ([#339](https://github.com/mozilla/glean_parser/pull/339)) +- Generate additional generics for event metrics ([#339](https://github.com/mozilla/glean_parser/pull/339)) +- For Kotlin skip generating `GleanBuildInfo.kt` when requested (with `with_buildinfo=false`) ([#341](https://github.com/mozilla/glean_parser/pull/341)) + +## 3.3.2 (2021-05-18) + +- Fix another bug in the Swift code generation when generating extra keys ([#334](https://github.com/mozilla/glean_parser/pull/334)) + +## 3.3.1 (2021-05-18) + +- Fix Swift code generation bug for pings ([#333](https://github.com/mozilla/glean_parser/pull/333)) + +## 3.3.0 (2021-05-18) + +- Generate new event API construct ([#321](https://github.com/mozilla/glean_parser/pull/321)) + +## 3.2.0 (2021-04-28) + +- Add option to add extra introductory text to generated markdown ([#298](https://github.com/mozilla/glean_parser/pull/298)) +- Add support for Qt in Javascript templates ([bug 1706252](https://bugzilla.mozilla.org/show_bug.cgi?id=1706252)) + - Javascript templates will now accept the `platform` option. If this option is set to `qt` + the generated templates will be Qt compatible. Default value is `webext`. + +## 3.1.2 (2021-04-21) + +- BUGFIX: Remove the "DO NOT COMMIT" notice from the documentation. + +## 3.1.1 (2021-04-19) + +- Recommend to not commit as well as to not edit the generated files. ([bug 1706042](https://bugzilla.mozilla.org/show_bug.cgi?id=1706042)) +- BUGFIX: Include import statement for labeled metric subtypes in Javascript and Typescript templates. + +## 3.1.0 (2021-04-16) + +- Add support for labeled metric types in Javascript and Typescript templates. + +## 3.0.0 (2021-04-13) + +- Raise limit on number of statically-defined lables to 100. ([bug 1702263](https://bugzilla.mozilla.org/show_bug.cgi?id=1702263)) +- BUGFIX: Version 2.0.0 of the schema now allows the "special" `glean_.*` ping names for Glean-internal use again. +- Remove support for JWE metric types. + +## 2.5.0 (2021-02-23) + +- Add parser and object model support for `rate` metric type. ([bug 1645166](https://bugzilla.mozilla.org/show_bug.cgi?id=1645166)) +- Add parser and object model support for telemetry_mirror property. ([bug 1685406](https://bugzilla.mozilla.org/show_bug.cgi?id=1685406)) +- Update the Javascript template to match Glean.js expectations. ([bug 1693516](https://bugzilla.mozilla.org/show_bug.cgi?id=1693516)) + - Glean.js has updated it's export strategy. It will now export each metric type as an independent module; + - Glean.js has dropped support for non ES6 modules. +- Add support for generating Typescript code. ([bug 1692157](https://bugzilla.mozilla.org/show_bug.cgi?id=1692157)) + - The templates added generate metrics and pings code for Glean.js. + +## 2.4.0 (2021-02-18) + +- **Experimental:** `glean_parser` has a new subcommand `coverage` to convert raw coverage reports + into something consumable by coverage tools, such as codecov.io +- The path to the file that each metric is defined in is now stored on the + `Metric` object in `defined_in["filepath"]`. + +## 2.3.0 (2021-02-17) + +- Leverage the `glean_namespace` to provide correct import when building for Javascript. + +## 2.2.0 (2021-02-11) + +- The Kotlin generator now generates static build information that can be passed + into `Glean.initialize` to avoid calling the package manager at runtime. + +## 2.1.0 (2021-02-10) + +- Add support for generating Javascript code. + - The templates added generate metrics and pings code for Glean.js. + +## 2.0.0 (2021-02-05) + +- New versions 2.0.0 of the `metrics.yaml` and `pings.yaml` schemas now ship + with `glean_parser`. These schemas are different from version 1.0.0 in the + following ways: + + - Bugs must be specified as URLs. Bug numbers are disallowed. + - The legacy ping names containing underscores are no longer allowed. These + included `deletion_request`, `bookmarks_sync`, `history_sync`, + `session_end`, `all_pings`, `glean_*`). In these cases, the `_` should be + replaced with `-`. + + To upgrade your app or library to use the new schema, replace the version in + the `$schema` value with `2-0-0`. + +- **Breaking change:** It is now an error to use bug numbers (rather than URLs) + in ping definitions. + +- Add the line number that metrics and pings were originally defined in the yaml + files. + +## 1.29.1 (2020-12-17) + +- BUGFIX: Linter output can now be redirected correctly (1675771). + +## 1.29.0 (2020-10-07) + +- **Breaking change:** `glean_parser` will now return an error code when any of + the input files do not exist (unless the `--allow-missing-files` flag is + passed). +- Generated code now includes a comment next to each metric containing the name + of the metric in its original `snake_case` form. +- When metrics don't provide a `unit` parameter, it is not included in the + output (as provided by probe-scraper). + +## 1.28.6 (2020-09-24) + +- BUGFIX: Ensure Kotlin arguments are deterministically ordered + +## 1.28.5 (2020-09-14) + +- Fix deploy step to update pip before deploying to pypi. + +## 1.28.4 (2020-09-14) + +- The `SUPERFLUOUS_NO_LINT` warning has been removed from the glinter. + It likely did more harm than good, and makes it hard to make + `metrics.yaml` files that pass across different versions of + `glean_parser`. +- Expired metrics will now produce a linter warning, `EXPIRED_METRIC`. +- Expiry dates that are more than 730 days (\~2 years) in the future + will produce a linter warning, `EXPIRATION_DATE_TOO_FAR`. +- Allow using the Quantity metric type outside of Gecko. +- New parser configs `custom_is_expired` and `custom_validate_expires` + added. These are both functions that take the `expires` value of the + metric and return a bool. (See `Metric.is_expired` and + `Metric.validate_expires`). These will allow FOG to provide custom + validation for its version-based `expires` values. + +## 1.28.3 (2020-07-28) + +- BUGFIX: Support HashSet and Dictionary in the C\## generated code. + +## 1.28.2 (2020-07-28) + +- BUGFIX: Generate valid C\## code when using Labeled metric types. + +## 1.28.1 (2020-07-24) + +- BUGFIX: Add missing column to correctly render markdown tables in generated + documentation. + +## 1.28.0 (2020-07-23) + +- **Breaking change:** The internal ping `deletion-request` was misnamed in + pings.py causing the linter to not allow use of the correctly named ping for + adding legacy ids to. Consuming apps will need to update their metrics.yaml if + they are using `deletion_request` in any `send_in_pings` to `deletion-request` + after updating. + +## 1.27.0 (2020-07-21) + +- Rename the `data_category` field to `data_sensitivity` to be clearer. + +## 1.26.0 (2020-07-21) + +- Add support for JWE metric types. +- Add a `data_sensitivity` field to all metrics for specifying the type of data + collected in the field. + +## 1.25.0 (2020-07-17) + +- Add support for generating C\## code. +- BUGFIX: The memory unit is now correctly passed to the MemoryDistribution + metric type in Swift. + +## 1.24.0 (2020-06-30) + +- BUGFIX: look for metrics in send\_if\_empty pings. Metrics for these kinds of + pings were being ignored. + +## 1.23.0 (2020-06-27) + +- Support for Python 3.5 has been dropped. +- BUGFIX: The ordering of event extra keys will now match with their enum, + fixing a serious bug where keys of extras may not match the correct values in + the data payload. See . + +## 1.22.0 (2020-05-28) + +- **Breaking change:** (Swift only) Combine all metrics and pings into a single + generated file `Metrics.swift`. + +## 1.21.0 (2020-05-25) + +- `glinter` messages have been improved with more details and to be more + actionable. +- A maximum of 10 `extra_keys` is now enforced for `event` metric types. +- BUGFIX: the `Lifetime` enum values now match the values of the implementation + in mozilla/glean. + +## 1.20.4 (2020-05-07) + +- BUGFIX: yamllint errors are now reported using the correct file name. + +## 1.20.3 (2020-05-06) + +- Support for using `timing_distribution`'s `time_unit` parameter to control + the range of acceptable values is documented. The default unit for this use + case is `nanosecond` to avoid creating a breaking change. See [bug + 1630997](https://bugzilla.mozilla.org/show_bug.cgi?id=1630997) for more + information. + +## 1.20.2 (2020-04-24) + +- Dependencies that depend on the version of Python being used are now specified + using the [Declaring platform specific dependencies syntax in + setuptools](https://setuptools.readthedocs.io/en/latest/setuptools.html##declaring-platform-specific-dependencies). + This means that more recent versions of dependencies are likely to be + installed on Python 3.6 and later, and unnecessary backport libraries won't + be installed on more recent Python versions. + +## 1.20.1 (2020-04-21) + +- The minimum version of the runtime dependencies has been lowered to increase + compatibility with other tools. These minimum versions are now tested in CI, + in addition to testing the latest versions of the dependencies that was + already happening in CI. + +## 1.20.0 (2020-04-15) + +- **Breaking change:** glinter errors found during the `translate` command will + now return an error code. glinter warnings will be displayed, but not return + an error code. +- `glean_parser` now produces a linter warning when `user` lifetime metrics are + set to expire. See [bug + 1604854](https://bugzilla.mozilla.org/show_bug.cgi?id=1604854) for additional + context. + +## 1.19.0 (2020-03-18) + +- **Breaking change:** The regular expression used to validate labels is + stricter and more correct. +- Add more information about pings to markdown documentation: + - State whether the ping includes client id; + - Add list of data review links; + - Add list of related bugs links. +- `glean_parser` now makes it easier to write external translation + functions for different language targets. +- BUGFIX: `glean_parser` now works on 32-bit Windows. + +## 1.18.3 (2020-02-24) + +- Dropped the `inflection` dependency. +- Constrained the `zipp` and `MarkupSafe` transitive dependencies to versions + that support Python 3.5. + +## 1.18.2 (2020-02-14) + +- BUGFIX: Fix rendering of first element of reason list. + +## 1.18.1 (2020-02-14) + +- BUGFIX: Reason codes are displayed in markdown output for built-in + pings as well. +- BUGFIX: Reason descriptions are indented correctly in markdown + output. +- BUGFIX: To avoid a compiler error, the `@JvmName` annotation isn't + added to private members. + +## 1.18.0 (2020-02-13) + +- **Breaking Change (Java API)** Have the metrics names in Java match the names + in Kotlin. See [Bug + 1588060](https://bugzilla.mozilla.org/show_bug.cgi?id=1588060). +- The reasons a ping are sent are now included in the generated markdown + documentation. + +## 1.17.3 (2020-02-05) + +- BUGFIX: The version of Jinja2 now specifies < 3.0, since that version no + longer supports Python 3.5. + +## 1.17.2 (2020-02-05) + +- BUGFIX: Fixes an import error in generated Kotlin code. + +## 1.17.1 (2020-02-05) + +- BUGFIX: Generated Swift code now includes `import Glean`, unless generating + for a Glean-internal build. + +## 1.17.0 (2020-02-03) + +- Remove default schema URL from `validate_ping` +- Make `schema` argument required for CLI +- BUGFIX: Avoid default import in Swift code for Glean itself +- BUGFIX: Restore order of fields in generated Swift code + +## 1.16.0 (2020-01-15) + +- Support for `reason` codes on pings was added. + +## 1.15.6 (2020-02-06) + +- BUGFIX: The version of Jinja2 now specifies < 3.0, since that version no + longer supports Python 3.5 (backported from 1.17.3). + +## 1.15.5 (2019-12-19) + +- BUGFIX: Also allow the legacy name `all_pings` for `send_in_pings` parameter + on metrics + +## 1.15.4 (2019-12-19) + +- BUGFIX: Also allow the legacy name `all_pings` + +## 1.15.3 (2019-12-13) + +- Add project title to markdown template. +- Remove "Sorry about that" from markdown template. +- BUGFIX: Replace dashes in variable names to force proper naming + +## 1.15.2 (2019-12-12) + +- BUGFIX: Use a pure Python library for iso8601 so there is no compilation + required. + +## 1.15.1 (2019-12-12) + +- BUGFIX: Add some additional ping names to the non-kebab-case allow list. + +## 1.15.0 (2019-12-12) + +- Restrict new pings names to be kebab-case and change `all_pings` to + `all-pings` + +## 1.14.0 (2019-12-06) + +- `glean_parser` now supports Python versions 3.5, 3.6, 3.7 and 3.8. + +## 1.13.0 (2019-12-04) + +- The `translate` command will no longer clear extra files in the output + directory. +- BUGFIX: Ensure all newlines in comments are prefixed with comment markers +- BUGFIX: Escape Swift keywords in variable names in generated code +- Generate documentation for pings that are sent if empty + +## 1.12.0 (2019-11-27) + +- Reserve the `deletion_request` ping name +- Added a new flag `send_if_empty` for pings + +## 1.11.0 (2019-11-13) + +- The `glinter` command now performs `yamllint` validation on registry files. + +## 1.10.0 (2019-11-11) + +- The Kotlin linter `detekt` is now run during CI, and for local + testing if installed. +- Python 3.8 is now tested in CI (in addition to Python 3.7). Using + `tox` for this doesn't work in modern versions of CircleCI, so the + `tox` configuration has been removed. +- `yamllint` has been added to test the YAML files on CI. +- ⚠ Metric types that don't yet have implementations in glean-core + have been removed. This includes `enumeration`, `rate`, `usage`, and + `use_counter`, as well as many labeled metrics that don't exist. + +## 1.9.5 (2019-10-22) + +- Allow a Swift lint for generated code +- New lint: Restrict what metric can go into the `baseline` ping +- New lint: Warn for slight misspellings in ping names +- BUGFIX: change Labeled types labels from lists to sets. + +## 1.9.4 (2019-10-16) + +- Use lists instead of sets in Labeled types labels to ensure that the order of + the labels passed to the `metrics.yaml` is kept. +- `glinter` will now check for duplicate labels and error if there are any. + +## 1.9.3 (2019-10-09) + +- Add labels from Labeled types to the Extra column in the Markdown template. + +## 1.9.2 (2019-10-08) + +- BUGFIX: Don't call `is_internal_metric` on `Ping` objects. + +## 1.9.1 (2019-10-07) + +- Don't include Glean internal metrics in the generated markdown. + +## 1.9.0 (2019-10-04) + +- Glinter now warns when bug numbers (rather than URLs) are used. +- BUGFIX: add `HistogramType` and `MemoryUnit` imports in Kotlin generated code. + +## 1.8.4 (2019-10-02) + +- Removed unsupported labeled metric types. + +## 1.8.3 (2019-10-02) + +- Fix indentation for generated Swift code + +## 1.8.2 (2019-10-01) + +- Created labeled metrics and events in Swift code and wrap it in a + configured namespace + +## 1.8.1 (2019-09-27) + +- BUGFIX: `memory_unit` is now passed to the Kotlin generator. + +## 1.8.0 (2019-09-26) + +- A new parser config, `do_not_disable_expired`, was added to turn off the + feature that expired metrics are automatically disabled. This is useful if you + want to retain the disabled value that is explicitly in the `metrics.yaml` + file. +- `glinter` will now report about superfluous `no_lint` entries. + +## 1.7.0 (2019-09-24) + +- A `glinter` tool is now included to find common mistakes in metric naming + and setup. This check is run during `translate` and warnings will be + displayed. ⚠ These warnings will be treated as errors in a future revision. + +## 1.6.1 (2019-09-17) + +- BUGFIX: `GleanGeckoMetricsMapping` must include `LabeledMetricType` + and `CounterMetricType`. + +## 1.6.0 (2019-09-17) + +- NEW: Support for outputting metrics in Swift. +- BUGFIX: Provides a helpful error message when `geckoview_datapoint` is used on + an metric type that doesn't support GeckoView exfiltration. +- Generate a lookup table for Gecko categorical histograms in + `GleanGeckoMetricsMapping`. +- Introduce a 'Swift' output generator. + +## 1.4.1 (2019-08-28) + +- Documentation only. + +## 1.4.0 (2019-08-27) + +- Added support for generating markdown documentation from `metrics.yaml` files. + +## 1.3.0 (2019-08-22) + +- `quantity` metric type has been added. + +## 1.2.1 (2019-08-13) + +- BUGFIX: `includeClientId` was not being output for PingType. + +## 1.2.0 (2019-08-13) + +- `memory_distribution` metric type has been added. +- `custom_distribution` metric type has been added. +- `labeled_timespan` is no longer an allowed metric type. + +## 1.1.0 (2019-08-05) + +- Add a special `all_pings` value to `send_in_pings`. + +## 1.0.0 (2019-07-29) + +- First release to start following strict semver. + +## 0.1.0 (2018-10-15) + +- First release on PyPI. + + diff --git a/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/RECORD b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/RECORD new file mode 100644 index 0000000000..f86c17c1d8 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/RECORD @@ -0,0 +1,40 @@ +glean_parser/__init__.py,sha256=bJljD052_0y-efcBhYpllICVCXOMHLcXRLNyrvfgt5A,533 +glean_parser/__main__.py,sha256=7kIBMO-kL7boJxYrKp3CkRr4xX4_ct4BqCiCvtg2jjU,8631 +glean_parser/coverage.py,sha256=2IwC4XMDtDamMkBFoYilmqJzW4gyypq65YVCur8SNas,4405 +glean_parser/data_review.py,sha256=BweeeTkNNS6HrIDkztawhbDByrk_-Avxpg7YeST3VAs,2152 +glean_parser/javascript.py,sha256=w4ZhNBHBKWYk0h3t7G0Ud2tR__hRqzn9dlEXNKLdQrA,11230 +glean_parser/kotlin.py,sha256=5z8_74xlqvHDsedwZhGf1_qb7swPEgIZumkJIuj3ef8,12598 +glean_parser/lint.py,sha256=A21ZKb9WSrgug6t8q1YHvXUxlB198xrkmZ26HCUDSlE,16303 +glean_parser/markdown.py,sha256=GkCr1CrV6mnRQseT6FO1-JJ7Eup8X3lxUfRMBTxXpe4,9066 +glean_parser/metrics.py,sha256=CSad9CbUWKF771Z82LhBAFAL9uXum0ycRwIHtl_i91E,12384 +glean_parser/parser.py,sha256=cUOnvSXKfEBg8YTpRcWiPcMwpFpK1TTqsVO_zjUtpR4,15309 +glean_parser/pings.py,sha256=yh_DzRAI9k2_NiCIlpQiNg-ggVrttB4hk7gwtKlr72s,2815 +glean_parser/rust.py,sha256=PJzTfYWzAumJYCP5IYPc6fhS_Qa30Q8NTK9plg3sDnk,6744 +glean_parser/swift.py,sha256=T1BSGahd9wUd6VDeNC89SdN6M34jKXDlydMpSI0QLOs,8379 +glean_parser/tags.py,sha256=bemKYvcbMO4JrghiNSe-A4BNNDtx_FlUPkgrPPJy84Y,1391 +glean_parser/translate.py,sha256=S_a4PMXt3PyD7Wg35OM4xHEwPraqkcJzm_w95IEegPU,7962 +glean_parser/translation_options.py,sha256=Lxzr6G7MP0tC_ZYlZXftS4j0SLiqO-5mGVTEc7ggXis,2037 +glean_parser/util.py,sha256=Hei33QDq4a_lIHp5j98KovN6C7tmLrvVamEX2a1DcTo,16825 +glean_parser/validate_ping.py,sha256=0TNvILH6dtzJDys3W8Kqorw6kk03me73OCUDtpoHcXU,2118 +glean_parser/schemas/metrics.1-0-0.schema.yaml,sha256=cND3cvi6iBfPUVmtfIBQfGJV9AALpbvN7nu8E33_J-o,19566 +glean_parser/schemas/metrics.2-0-0.schema.yaml,sha256=SOgqMzRs9QxyCBhjZwUhzlryeNLeaVAKMTwggG7XtQk,23843 +glean_parser/schemas/pings.1-0-0.schema.yaml,sha256=hwCnsKpEysmrmVp-QHGBArEkVY3vaU1rVsxlTwhAzws,4315 +glean_parser/schemas/pings.2-0-0.schema.yaml,sha256=rD1s-rfz1xC9biHyLfBCnsoQxVYHwpe_S05awfe2xDA,4363 +glean_parser/schemas/tags.1-0-0.schema.yaml,sha256=OGXIJlvvVW1vaqB_NVZnwKeZ-sLlfH57vjBSHbj6DNI,1231 +glean_parser/templates/data_review.jinja2,sha256=jeYU29T1zLSyu9fKBBFu5BFPfIw8_hmOUXw8RXhRXK8,3287 +glean_parser/templates/javascript.buildinfo.jinja2,sha256=4mXiZCQIk9if4lxlA05kpSIL4a95IdwGwqle2OqqNAs,474 +glean_parser/templates/javascript.jinja2,sha256=cT_bG-jC6m4afECXmcsqHwiiHjRuVtJnfv90OD2Mwxw,2669 +glean_parser/templates/kotlin.buildinfo.jinja2,sha256=X0lk2SNu5OIIj2i6mUyF9CWFQIonLgfqkgT5fA-5G6c,920 +glean_parser/templates/kotlin.geckoview.jinja2,sha256=MJOgtoDXmBjE9pwk-G6T89y36RZuMbDWM_-DBN_gFJo,5099 +glean_parser/templates/kotlin.jinja2,sha256=3DqUMXJRkmTvSp_5IRyvGmw5iXYWdox7coMFe3YDxcc,5247 +glean_parser/templates/markdown.jinja2,sha256=vAHHGGm28HRDPd3zO_wQMAUZIuxE9uQ7hl3NpXxcKV4,3425 +glean_parser/templates/qmldir.jinja2,sha256=m6IGsp-tgTiOfQ7VN8XW6GqX0gJqJkt3B6Pkaul6FVo,156 +glean_parser/templates/rust.jinja2,sha256=tznLKaZxi_Z9puGqDKD0uuWefZcVHiNdQHB4BP9zJfs,10797 +glean_parser/templates/swift.jinja2,sha256=OsaEIlEdcOrUMvI_UzbxWv75lluTAWZGncH_pU-pbZQ,4809 +glean_parser-7.2.1.dist-info/AUTHORS.md,sha256=yxgj8MioO4wUnrh0gmfb8l3DJJrf-l4HmmEDbQsbbNI,455 +glean_parser-7.2.1.dist-info/LICENSE,sha256=HyVuytGSiAUQ6ErWBHTqt1iSGHhLmlC8fO7jTCuR8dU,16725 +glean_parser-7.2.1.dist-info/METADATA,sha256=6ZY8M4qK01Cz54nm4d9tOii3CBbW9lSaUSpHwUnm9JA,28275 +glean_parser-7.2.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92 +glean_parser-7.2.1.dist-info/entry_points.txt,sha256=s-clJTIqp-PpJD-n3AnIQZFkTafIrzsTbAPX9vNY018,69 +glean_parser-7.2.1.dist-info/top_level.txt,sha256=q7T3duD-9tYZFyDry6Wv2LcdMsK2jGnzdDFhxWcT2Z8,13 +glean_parser-7.2.1.dist-info/RECORD,, diff --git a/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/WHEEL b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/WHEEL new file mode 100644 index 0000000000..1f37c02f2e --- /dev/null +++ b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.40.0) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/entry_points.txt b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/entry_points.txt new file mode 100644 index 0000000000..2a22ca7321 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +glean_parser = glean_parser.__main__:main_wrapper + diff --git a/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/top_level.txt b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/top_level.txt new file mode 100644 index 0000000000..a7f3a37918 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser-7.2.1.dist-info/top_level.txt @@ -0,0 +1 @@ +glean_parser diff --git a/third_party/python/glean_parser/glean_parser/__init__.py b/third_party/python/glean_parser/glean_parser/__init__.py new file mode 100644 index 0000000000..ddca930c79 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Top-level package for Glean parser.""" + +from pkg_resources import get_distribution, DistributionNotFound + +try: + __version__ = get_distribution(__name__).version +except DistributionNotFound: + # package is not installed + pass + +__author__ = """The Glean Team""" +__email__ = "glean-team@mozilla.com" diff --git a/third_party/python/glean_parser/glean_parser/__main__.py b/third_party/python/glean_parser/glean_parser/__main__.py new file mode 100644 index 0000000000..24876a1439 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/__main__.py @@ -0,0 +1,349 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Console script for glean_parser.""" + +import datetime +import io +from pathlib import Path +import sys + +import click +import json + + +import glean_parser + + +from . import coverage as mod_coverage +from . import data_review as mod_data_review +from . import lint +from . import translate as mod_translate +from . import validate_ping +from . import translation_options + + +CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) + + +@click.command(context_settings=CONTEXT_SETTINGS) +@click.argument( + "input", + type=click.Path(exists=False, dir_okay=False, file_okay=True, readable=True), + nargs=-1, +) +@click.option( + "--output", + "-o", + type=click.Path(dir_okay=True, file_okay=False, writable=True), + nargs=1, + required=True, +) +@click.option( + "--format", + "-f", + type=click.Choice(list(mod_translate.OUTPUTTERS.keys())), + required=True, +) +@click.option( + "--option", + "-s", + help="Backend-specific option. Must be of the form key=value.\ + Pass 'help' for valid options", + type=str, + multiple=True, + required=False, + is_eager=True, + callback=translation_options.translate_options, +) +@click.option( + "--allow-reserved", + is_flag=True, + help=( + "If provided, allow the use of reserved fields. " + "Should only be set when building the Glean library itself." + ), +) +@click.option( + "--allow-missing-files", + is_flag=True, + help=("Do not treat missing input files as an error."), +) +@click.option( + "--require-tags", + is_flag=True, + help=("Require tags to be specified for metrics and pings."), +) +@click.option( + "--expire-by-version", + help="Expire metrics by version, with the provided major version.", + type=click.INT, + required=False, +) +def translate( + input, + format, + output, + option, + allow_reserved, + allow_missing_files, + require_tags, + expire_by_version, +): + """ + Translate metrics.yaml and pings.yaml files to other formats. + """ + option_dict = {} + for opt in option: + key, val = opt.split("=", 1) + option_dict[key] = val + + sys.exit( + mod_translate.translate( + [Path(x) for x in input], + format, + Path(output), + option_dict, + { + "allow_reserved": allow_reserved, + "allow_missing_files": allow_missing_files, + "require_tags": require_tags, + "expire_by_version": expire_by_version, + }, + ) + ) + + +@click.command() +@click.option( + "--schema", + "-s", + type=str, + nargs=1, + required=True, + help=("HTTP url or file path to Glean ping schema. If remote, will cache to disk."), +) +def check(schema): + """ + Validate the contents of a Glean ping. + + The ping contents are read from stdin, and the validation errors are + written to stdout. + """ + sys.exit( + validate_ping.validate_ping( + io.TextIOWrapper(sys.stdin.buffer, encoding="utf-8"), + io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8"), + schema_url=schema, + ) + ) + + +@click.command() +@click.argument( + "input", + type=click.Path(exists=True, dir_okay=False, file_okay=True, readable=True), + nargs=-1, +) +@click.option( + "--allow-reserved", + is_flag=True, + help=( + "If provided, allow the use of reserved fields. " + "Should only be set when building the Glean library itself." + ), +) +@click.option( + "--allow-missing-files", + is_flag=True, + help=("Do not treat missing input files as an error."), +) +@click.option( + "--require-tags", + is_flag=True, + help=("Require tags to be specified for metrics and pings."), +) +def glinter(input, allow_reserved, allow_missing_files, require_tags): + """ + Runs a linter over the metrics. + """ + sys.exit( + lint.glinter( + [Path(x) for x in input], + { + "allow_reserved": allow_reserved, + "allow_missing_files": allow_missing_files, + "require_tags": require_tags, + }, + ) + ) + + +@click.command() +@click.argument( + "input", + type=click.Path(exists=True, dir_okay=False, file_okay=True, readable=True), + nargs=-1, +) +@click.option( + "--allow-reserved", + is_flag=True, + help=( + "If provided, allow the use of reserved fields. " + "Should only be set when building the Glean library itself." + ), +) +@click.option( + "--allow-missing-files", + is_flag=True, + help=("Do not treat missing input files as an error."), +) +@click.option( + "--require-tags", + is_flag=True, + help=("Require tags to be specified for metrics and pings."), +) +def dump(input, allow_reserved, allow_missing_files, require_tags): + """ + Dump the list of metrics/pings as JSON to stdout. + """ + + results = glean_parser.parser.parse_objects( + [Path(x) for x in input], + { + "allow_reserved": allow_reserved, + "allow_missing_files": allow_missing_files, + "require_tags": require_tags, + }, + ) + errs = list(results) + assert len(errs) == 0 + + metrics = { + metric.identifier(): metric.serialize() + for category, probes in results.value.items() + for probe_name, metric in probes.items() + } + + def date_serializer(o): + if isinstance(o, datetime.datetime): + return o.isoformat() + + print( + json.dumps( + metrics, + sort_keys=True, + indent=2, + separators=(",", ": "), + default=date_serializer, + ) + ) + + +@click.command() +@click.option( + "-c", + "--coverage_file", + type=click.Path(exists=True, dir_okay=False, file_okay=True, readable=True), + required=True, + multiple=True, +) +@click.argument( + "metrics_files", + type=click.Path(exists=True, dir_okay=False, file_okay=True, readable=True), + nargs=-1, +) +@click.option( + "-o", + "--output", + type=click.Path(exists=False, dir_okay=False, file_okay=True, writable=True), + required=True, +) +@click.option( + "--format", + "-f", + type=click.Choice(list(mod_coverage.OUTPUTTERS.keys())), + required=True, +) +@click.option( + "--allow-reserved", + is_flag=True, + help=( + "If provided, allow the use of reserved fields. " + "Should only be set when building the Glean library itself." + ), +) +def coverage(coverage_file, metrics_files, format, output, allow_reserved): + """ + Produce a coverage analysis file given raw coverage output and a set of + metrics.yaml files. + """ + sys.exit( + mod_coverage.coverage( + [Path(x) for x in coverage_file], + [Path(x) for x in metrics_files], + format, + Path(output), + { + "allow_reserved": allow_reserved, + }, + ) + ) + + +@click.command() +@click.argument("bug", type=str) +@click.argument( + "metrics_files", + type=click.Path(exists=True, dir_okay=False, file_okay=True, readable=True), + nargs=-1, +) +def data_review_request(bug, metrics_files): + """ + Generate a skeleton Data Review Request for all metrics in metrics_files + whose bug_numbers fields contain the provided bug string. + For example, providing "1694739" matches + "https://bugzilla.mozilla.org/show_bug.cgi?id=1694739". + To ensure substrings don't match, the provided bug string will match only + if it is bounded by non-word characters. + Prints to stdout. + """ + sys.exit(mod_data_review.generate(bug, [Path(x) for x in metrics_files])) + + +@click.group() +@click.version_option(glean_parser.__version__, prog_name="glean_parser") +def main(args=None): + """Command line utility for glean_parser.""" + pass + + +main.add_command(translate) +main.add_command(check) +main.add_command(glinter) +main.add_command(dump) +main.add_command(coverage) +main.add_command(data_review_request, "data-review") + + +def main_wrapper(args=None): + """ + A simple wrapper around click's `main` to display the glean_parser version + when there is an error. + """ + try: + main(args=args) + except SystemExit as e: + if e.code != 0: + print( + f"ERROR running glean_parser v{glean_parser.__version__}", + file=sys.stderr, + ) + raise + + +if __name__ == "__main__": + main_wrapper() # pragma: no cover diff --git a/third_party/python/glean_parser/glean_parser/coverage.py b/third_party/python/glean_parser/glean_parser/coverage.py new file mode 100644 index 0000000000..776ea3183d --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/coverage.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Produce coverage reports from the raw information produced by the +`GLEAN_TEST_COVERAGE` feature. +""" + +import json +from .metrics import ObjectTree +from pathlib import Path +import sys +from typing import Any, Dict, List, Optional, Sequence, Set + + +from . import parser +from . import util + + +def _outputter_codecovio(metrics: ObjectTree, output_path: Path): + """ + Output coverage in codecov.io format as defined here: + + https://docs.codecov.io/docs/codecov-custom-coverage-format + + :param metrics: The tree of metrics, already annotated with coverage by + `_annotate_coverage`. + :param output_path: The file to output to. + """ + coverage: Dict[str, List] = {} + for category in metrics.values(): + for metric in category.values(): + defined_in = metric.defined_in + if defined_in is not None: + path = defined_in["filepath"] + if path not in coverage: + with open(path) as fd: + nlines = len(list(fd.readlines())) + lines = [None] * nlines + coverage[path] = lines + file_section = coverage[path] + file_section[int(defined_in["line"])] = getattr(metric, "covered", 0) + + with open(output_path, "w") as fd: + json.dump({"coverage": coverage}, fd) + + +OUTPUTTERS = {"codecovio": _outputter_codecovio} + + +def _annotate_coverage(metrics, coverage_entries): + """ + Annotate each metric with whether it is covered. Sets the attribute + `covered` to 1 on each metric that is covered. + """ + mapping = {} + for category in metrics.values(): + for metric in category.values(): + mapping[metric.identifier()] = metric + + for entry in coverage_entries: + metric_id = _coverage_entry_to_metric_id(entry) + if metric_id in mapping: + mapping[metric_id].covered = 1 + + +def _coverage_entry_to_metric_id(entry: str) -> str: + """ + Convert a coverage entry to a metric id. + + Technically, the coverage entries are rkv database keys, so are not just + the metric identifier. This extracts the metric identifier part out. + """ + # If getting a glean error count, report it as covering the metric the + # error occurred in, not the `glean.error.*` metric itself. + if entry.startswith("glean.error."): + entry = entry.split("/")[-1] + # If a labeled metric, strip off the label part + return entry.split("/")[0] + + +def _read_coverage_entries(coverage_reports: List[Path]) -> Set[str]: + """ + Read coverage entries from one or more files, and deduplicates them. + """ + entries = set() + + for coverage_report in coverage_reports: + with open(coverage_report) as fd: + for line in fd.readlines(): + entries.add(line.strip()) + + return entries + + +def coverage( + coverage_reports: List[Path], + metrics_files: Sequence[Path], + output_format: str, + output_file: Path, + parser_config: Optional[Dict[str, Any]] = None, + file=sys.stderr, +) -> int: + """ + Commandline helper for coverage. + + :param coverage_reports: List of coverage report files, output from the + Glean SDK when the `GLEAN_TEST_COVERAGE` environment variable is set. + :param metrics_files: List of Path objects to load metrics from. + :param output_format: The coverage output format to produce. Must be one of + `OUTPUTTERS.keys()`. + :param output_file: Path to output coverage report to. + :param parser_config: Parser configuration object, passed to + `parser.parse_objects`. + :return: Non-zero if there were any errors. + """ + + if parser_config is None: + parser_config = {} + + if output_format not in OUTPUTTERS: + raise ValueError(f"Unknown outputter {output_format}") + + metrics_files = util.ensure_list(metrics_files) + + all_objects = parser.parse_objects(metrics_files, parser_config) + + if util.report_validation_errors(all_objects): + return 1 + + entries = _read_coverage_entries(coverage_reports) + + _annotate_coverage(all_objects.value, entries) + + OUTPUTTERS[output_format](all_objects.value, output_file) + + return 0 diff --git a/third_party/python/glean_parser/glean_parser/data_review.py b/third_party/python/glean_parser/glean_parser/data_review.py new file mode 100644 index 0000000000..2267d49315 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/data_review.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Produce skeleton Data Review Requests. +""" + +from pathlib import Path +from typing import Sequence +import re + + +from . import parser +from . import util + + +def generate( + bug: str, + metrics_files: Sequence[Path], +) -> int: + """ + Commandline helper for Data Review Request template generation. + + :param bug: pattern to match in metrics' bug_numbers lists. + :param metrics_files: List of Path objects to load metrics from. + :return: Non-zero if there were any errors. + """ + + metrics_files = util.ensure_list(metrics_files) + + # Accept any value of expires. + parser_options = { + "allow_reserved": True, + "custom_is_expired": lambda expires: False, + "custom_validate_expires": lambda expires: True, + } + all_objects = parser.parse_objects(metrics_files, parser_options) + + if util.report_validation_errors(all_objects): + return 1 + + # I tried [\W\Z] but it complained. So `|` it is. + reobj = re.compile(f"\\W{bug}\\W|\\W{bug}$") + durations = set() + responsible_emails = set() + filtered_metrics = list() + for metrics in all_objects.value.values(): + for metric in metrics.values(): + if not any([len(reobj.findall(bug)) == 1 for bug in metric.bugs]): + continue + + filtered_metrics.append(metric) + + durations.add(metric.expires) + + if metric.expires == "never": + responsible_emails.update(metric.notification_emails) + + if len(filtered_metrics) == 0: + print(f"I'm sorry, I couldn't find metrics matching the bug number {bug}.") + return 1 + + template = util.get_jinja2_template( + "data_review.jinja2", + filters=(("snake_case", util.snake_case),), + ) + + print( + template.render( + metrics=filtered_metrics, + durations=durations, + responsible_emails=responsible_emails, + ) + ) + + return 0 diff --git a/third_party/python/glean_parser/glean_parser/javascript.py b/third_party/python/glean_parser/glean_parser/javascript.py new file mode 100644 index 0000000000..1473065beb --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/javascript.py @@ -0,0 +1,322 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Outputter to generate Javascript code for metrics. +""" + +import enum +import json +from pathlib import Path +from typing import Any, Dict, Optional, Callable + +from . import __version__ +from . import metrics +from . import util + + +def javascript_datatypes_filter(value: util.JSONType) -> str: + """ + A Jinja2 filter that renders Javascript literals. + + Based on Python's JSONEncoder, but overrides: + - lists to use listOf + - sets to use setOf + - Rate objects to a CommonMetricData initializer + (for external Denominators' Numerators lists) + """ + + class JavascriptEncoder(json.JSONEncoder): + def iterencode(self, value): + if isinstance(value, enum.Enum): + yield from super().iterencode(util.camelize(value.name)) + elif isinstance(value, list): + yield "[" + first = True + for subvalue in value: + if not first: + yield ", " + yield from self.iterencode(subvalue) + first = False + yield "]" + elif isinstance(value, set): + yield "[" + first = True + for subvalue in sorted(list(value)): + if not first: + yield ", " + yield from self.iterencode(subvalue) + first = False + yield "]" + elif isinstance(value, metrics.Rate): + yield "CommonMetricData(" + first = True + for arg_name in util.common_metric_args: + if hasattr(value, arg_name): + if not first: + yield ", " + yield f"{util.camelize(arg_name)} = " + yield from self.iterencode(getattr(value, arg_name)) + first = False + yield ")" + else: + yield from super().iterencode(value) + + return "".join(JavascriptEncoder().iterencode(value)) + + +def class_name_factory(platform: str) -> Callable[[str], str]: + """ + Returns a function that receives an obj_type and + returns the correct class name for that type in the current platform. + """ + + def class_name(obj_type: str) -> str: + if obj_type == "ping": + class_name = "PingType" + else: + if obj_type.startswith("labeled_"): + obj_type = obj_type[8:] + class_name = util.Camelize(obj_type) + "MetricType" + + if platform == "qt": + return "Glean.Glean._private." + class_name + + return class_name + + return class_name + + +def extra_type_name(extra_type: str) -> str: + """ + Returns the equivalent TypeScript type to an extra type. + """ + if extra_type == "quantity": + return "number" + + return extra_type + + +def import_path(obj_type: str) -> str: + """ + Returns the import path of the given object inside the @mozilla/glean package. + """ + if obj_type == "ping": + import_path = "ping" + else: + if obj_type.startswith("labeled_"): + obj_type = obj_type[8:] + import_path = "metrics/" + obj_type + + return import_path + + +def args(obj_type: str) -> Dict[str, object]: + """ + Returns the list of arguments for each object type. + """ + if obj_type == "ping": + return {"common": util.ping_args, "extra": []} + + return {"common": util.common_metric_args, "extra": util.extra_metric_args} + + +def generate_build_date(date: Optional[str]) -> str: + """ + Generate the build Date object. + """ + + ts = util.build_date(date) + + data = [ + str(ts.year), + # In JavaScript the first month of the year in calendars is JANUARY which is 0. + # In Python it's 1-based + str(ts.month - 1), + str(ts.day), + str(ts.hour), + str(ts.minute), + str(ts.second), + ] + components = ", ".join(data) + + # DatetimeMetricType takes a `Date` instance. + return f"new Date({components})" # noqa + + +def output( + lang: str, + objs: metrics.ObjectTree, + output_dir: Path, + options: Optional[Dict[str, Any]] = None, +) -> None: + """ + Given a tree of objects, output Javascript or Typescript code to `output_dir`. + + :param lang: Either "javascript" or "typescript"; + :param objects: A tree of objects (metrics and pings) as returned from + `parser.parse_objects`. + :param output_dir: Path to an output directory to write to. + :param options: options dictionary, with the following optional keys: + - `platform`: Which platform are we building for. Options are `webext` and `qt`. + Default is `webext`. + - `version`: The version of the Glean.js Qt library being used. + This option is mandatory when targeting Qt. Note that the version + string must only contain the major and minor version i.e. 0.14. + - `with_buildinfo`: If "true" a `gleanBuildInfo.(js|ts)` file is generated. + Otherwise generation of that file is skipped. Defaults to "false". + - `build_date`: If set to `0` a static unix epoch time will be used. + If set to a ISO8601 datetime string (e.g. `2022-01-03T17:30:00`) + it will use that date. + Other values will throw an error. + If not set it will use the current date & time. + """ + + if options is None: + options = {} + + platform = options.get("platform", "webext") + accepted_platforms = ["qt", "webext", "node"] + if platform not in accepted_platforms: + raise ValueError( + f"Unknown platform: {platform}. Accepted platforms are: {accepted_platforms}." # noqa + ) + version = options.get("version") + if platform == "qt" and version is None: + raise ValueError( + "'version' option is required when building for the 'qt' platform." + ) + + template = util.get_jinja2_template( + "javascript.jinja2", + filters=( + ("class_name", class_name_factory(platform)), + ("extra_type_name", extra_type_name), + ("import_path", import_path), + ("js", javascript_datatypes_filter), + ("args", args), + ), + ) + + for category_key, category_val in objs.items(): + extension = ".js" if lang == "javascript" else ".ts" + filename = util.camelize(category_key) + extension + filepath = output_dir / filename + + types = set( + [ + # This takes care of the regular metric type imports + # as well as the labeled metric subtype imports, + # thus the removal of the `labeled_` substring. + # + # The actual LabeledMetricType import is conditioned after + # the `has_labeled_metrics` boolean. + obj.type if not obj.type.startswith("labeled_") else obj.type[8:] + for obj in category_val.values() + ] + ) + has_labeled_metrics = any( + getattr(metric, "labeled", False) for metric in category_val.values() + ) + with filepath.open("w", encoding="utf-8") as fd: + fd.write( + template.render( + parser_version=__version__, + category_name=category_key, + objs=category_val, + extra_args=util.extra_args, + platform=platform, + version=version, + has_labeled_metrics=has_labeled_metrics, + types=types, + lang=lang, + ) + ) + # Jinja2 squashes the final newline, so we explicitly add it + fd.write("\n") + + with_buildinfo = options.get("with_buildinfo", "").lower() == "true" + build_date = options.get("build_date", None) + if with_buildinfo: + # Write out the special "build info" file + template = util.get_jinja2_template( + "javascript.buildinfo.jinja2", + ) + # This filename needs to start with "glean" so it can never + # clash with a metric category + filename = "gleanBuildInfo" + extension + filepath = output_dir / filename + + with filepath.open("w", encoding="utf-8") as fd: + fd.write( + template.render( + parser_version=__version__, + platform=platform, + build_date=generate_build_date(build_date), + ) + ) + fd.write("\n") + + if platform == "qt": + # Explicitly create a qmldir file when building for Qt + template = util.get_jinja2_template("qmldir.jinja2") + filepath = output_dir / "qmldir" + + with filepath.open("w", encoding="utf-8") as fd: + fd.write( + template.render( + parser_version=__version__, categories=objs.keys(), version=version + ) + ) + # Jinja2 squashes the final newline, so we explicitly add it + fd.write("\n") + + +def output_javascript( + objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]] = None +) -> None: + """ + Given a tree of objects, output Javascript code to `output_dir`. + + :param objects: A tree of objects (metrics and pings) as returned from + `parser.parse_objects`. + :param output_dir: Path to an output directory to write to. + :param options: options dictionary, with the following optional keys: + + - `namespace`: The identifier of the global variable to assign to. + This will only have and effect for Qt and static web sites. + Default is `Glean`. + - `platform`: Which platform are we building for. Options are `webext` and `qt`. + Default is `webext`. + """ + + output("javascript", objs, output_dir, options) + + +def output_typescript( + objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]] = None +) -> None: + """ + Given a tree of objects, output Typescript code to `output_dir`. + + # Note + + The only difference between the typescript and javascript templates, + currently is the file extension. + + :param objects: A tree of objects (metrics and pings) as returned from + `parser.parse_objects`. + :param output_dir: Path to an output directory to write to. + :param options: options dictionary, with the following optional keys: + + - `namespace`: The identifier of the global variable to assign to. + This will only have and effect for Qt and static web sites. + Default is `Glean`. + - `platform`: Which platform are we building for. Options are `webext` and `qt`. + Default is `webext`. + """ + + output("typescript", objs, output_dir, options) diff --git a/third_party/python/glean_parser/glean_parser/kotlin.py b/third_party/python/glean_parser/glean_parser/kotlin.py new file mode 100644 index 0000000000..82cc63d237 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/kotlin.py @@ -0,0 +1,356 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Outputter to generate Kotlin code for metrics. +""" + +from collections import OrderedDict +import enum +import json +from pathlib import Path +from typing import Any, Dict, List, Optional, Union # noqa + +from . import __version__ +from . import metrics +from . import pings +from . import tags +from . import util +from .util import DictWrapper + + +def kotlin_datatypes_filter(value: util.JSONType) -> str: + """ + A Jinja2 filter that renders Kotlin literals. + + Based on Python's JSONEncoder, but overrides: + - lists to use listOf + - dicts to use mapOf + - sets to use setOf + - enums to use the like-named Kotlin enum + - Rate objects to a CommonMetricData initializer + (for external Denominators' Numerators lists) + """ + + class KotlinEncoder(json.JSONEncoder): + def iterencode(self, value): + if isinstance(value, list): + yield "listOf(" + first = True + for subvalue in value: + if not first: + yield ", " + yield from self.iterencode(subvalue) + first = False + yield ")" + elif isinstance(value, dict): + yield "mapOf(" + first = True + for key, subvalue in value.items(): + if not first: + yield ", " + yield from self.iterencode(key) + yield " to " + yield from self.iterencode(subvalue) + first = False + yield ")" + elif isinstance(value, enum.Enum): + # UniFFI generates SCREAMING_CASE enum variants. + yield (value.__class__.__name__ + "." + util.screaming_case(value.name)) + elif isinstance(value, set): + yield "setOf(" + first = True + for subvalue in sorted(list(value)): + if not first: + yield ", " + yield from self.iterencode(subvalue) + first = False + yield ")" + elif isinstance(value, metrics.Rate): + yield "CommonMetricData(" + first = True + for arg_name in util.common_metric_args: + if hasattr(value, arg_name): + if not first: + yield ", " + yield f"{util.camelize(arg_name)} = " + yield from self.iterencode(getattr(value, arg_name)) + first = False + yield ")" + else: + yield from super().iterencode(value) + + return "".join(KotlinEncoder().iterencode(value)) + + +def type_name(obj: Union[metrics.Metric, pings.Ping]) -> str: + """ + Returns the Kotlin type to use for a given metric or ping object. + """ + generate_enums = getattr(obj, "_generate_enums", []) + if len(generate_enums): + generic = None + for member, suffix in generate_enums: + if len(getattr(obj, member)): + if isinstance(obj, metrics.Event): + generic = util.Camelize(obj.name) + suffix + else: + generic = util.camelize(obj.name) + suffix + else: + if isinstance(obj, metrics.Event): + generic = "NoExtras" + else: + generic = "No" + suffix + + return "{}<{}>".format(class_name(obj.type), generic) + + return class_name(obj.type) + + +def extra_type_name(typ: str) -> str: + """ + Returns the corresponding Kotlin type for event's extra key types. + """ + + if typ == "boolean": + return "Boolean" + elif typ == "string": + return "String" + elif typ == "quantity": + return "Int" + else: + return "UNSUPPORTED" + + +def class_name(obj_type: str) -> str: + """ + Returns the Kotlin class name for a given metric or ping type. + """ + if obj_type == "ping": + return "PingType" + if obj_type.startswith("labeled_"): + obj_type = obj_type[8:] + return util.Camelize(obj_type) + "MetricType" + + +def generate_build_date(date: Optional[str]) -> str: + """ + Generate the build timestamp. + """ + + ts = util.build_date(date) + + data = [ + str(ts.year), + # In Java the first month of the year in calendars is JANUARY which is 0. + # In Python it's 1-based + str(ts.month - 1), + str(ts.day), + str(ts.hour), + str(ts.minute), + str(ts.second), + ] + components = ", ".join(data) + + # DatetimeMetricType takes a `Calendar` instance. + return f'Calendar.getInstance(TimeZone.getTimeZone("GMT+0")).also {{ cal -> cal.set({components}) }}' # noqa + + +def output_gecko_lookup( + objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]] = None +) -> None: + """ + Given a tree of objects, generate a Kotlin map between Gecko histograms and + Glean SDK metric types. + + :param objects: A tree of objects (metrics and pings) as returned from + `parser.parse_objects`. + :param output_dir: Path to an output directory to write to. + :param options: options dictionary, with the following optional keys: + + - `namespace`: The package namespace to declare at the top of the + generated files. Defaults to `GleanMetrics`. + - `glean_namespace`: The package namespace of the glean library itself. + This is where glean objects will be imported from in the generated + code. + """ + if options is None: + options = {} + + template = util.get_jinja2_template( + "kotlin.geckoview.jinja2", + filters=( + ("kotlin", kotlin_datatypes_filter), + ("type_name", type_name), + ("class_name", class_name), + ), + ) + + namespace = options.get("namespace", "GleanMetrics") + glean_namespace = options.get("glean_namespace", "mozilla.components.service.glean") + + # Build a dictionary that contains data for metrics that are + # histogram-like/scalar-like and contain a gecko_datapoint, with this format: + # + # { + # "histograms": { + # "category": [ + # {"gecko_datapoint": "the-datapoint", "name": "the-metric-name"}, + # ... + # ], + # ... + # }, + # "other-type": {} + # } + gecko_metrics: Dict[str, Dict[str, List[Dict[str, str]]]] = DictWrapper() + + # Define scalar-like types. + SCALAR_LIKE_TYPES = ["boolean", "string", "quantity"] + + for category_key, category_val in objs.items(): + # Support exfiltration of Gecko metrics from products using both the + # Glean SDK and GeckoView. See bug 1566356 for more context. + for metric in category_val.values(): + # This is not a Gecko metric, skip it. + if ( + isinstance(metric, pings.Ping) + or isinstance(metric, tags.Tag) + or not getattr(metric, "gecko_datapoint", False) + ): + continue + + # Put scalars in their own categories, histogram-like in "histograms" and + # categorical histograms in "categoricals". + type_category = "histograms" + if metric.type in SCALAR_LIKE_TYPES: + type_category = metric.type + elif metric.type == "labeled_counter": + # Labeled counters with a 'gecko_datapoint' property + # are categorical histograms. + type_category = "categoricals" + + gecko_metrics.setdefault(type_category, OrderedDict()) + gecko_metrics[type_category].setdefault(category_key, []) + + gecko_metrics[type_category][category_key].append( + {"gecko_datapoint": metric.gecko_datapoint, "name": metric.name} + ) + + if not gecko_metrics: + # Bail out and don't create a file if no gecko metrics + # are found. + return + + filepath = output_dir / "GleanGeckoMetricsMapping.kt" + with filepath.open("w", encoding="utf-8") as fd: + fd.write( + template.render( + parser_version=__version__, + gecko_metrics=gecko_metrics, + namespace=namespace, + glean_namespace=glean_namespace, + ) + ) + # Jinja2 squashes the final newline, so we explicitly add it + fd.write("\n") + + +def output_kotlin( + objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]] = None +) -> None: + """ + Given a tree of objects, output Kotlin code to `output_dir`. + + :param objects: A tree of objects (metrics and pings) as returned from + `parser.parse_objects`. + :param output_dir: Path to an output directory to write to. + :param options: options dictionary, with the following optional keys: + + - `namespace`: The package namespace to declare at the top of the + generated files. Defaults to `GleanMetrics`. + - `glean_namespace`: The package namespace of the glean library itself. + This is where glean objects will be imported from in the generated + code. + - `with_buildinfo`: If "true" a `GleanBuildInfo.kt` file is generated. + Otherwise generation of that file is skipped. + Defaults to "true". + - `build_date`: If set to `0` a static unix epoch time will be used. + If set to a ISO8601 datetime string (e.g. `2022-01-03T17:30:00`) + it will use that date. + Other values will throw an error. + If not set it will use the current date & time. + """ + if options is None: + options = {} + + namespace = options.get("namespace", "GleanMetrics") + glean_namespace = options.get("glean_namespace", "mozilla.components.service.glean") + namespace_package = namespace[: namespace.rfind(".")] + with_buildinfo = options.get("with_buildinfo", "true").lower() == "true" + build_date = options.get("build_date", None) + + # Write out the special "build info" object + template = util.get_jinja2_template( + "kotlin.buildinfo.jinja2", + ) + + if with_buildinfo: + build_date = generate_build_date(build_date) + # This filename needs to start with "Glean" so it can never clash with a + # metric category + with (output_dir / "GleanBuildInfo.kt").open("w", encoding="utf-8") as fd: + fd.write( + template.render( + parser_version=__version__, + namespace=namespace, + namespace_package=namespace_package, + glean_namespace=glean_namespace, + build_date=build_date, + ) + ) + fd.write("\n") + + template = util.get_jinja2_template( + "kotlin.jinja2", + filters=( + ("kotlin", kotlin_datatypes_filter), + ("type_name", type_name), + ("extra_type_name", extra_type_name), + ("class_name", class_name), + ), + ) + + for category_key, category_val in objs.items(): + filename = util.Camelize(category_key) + ".kt" + filepath = output_dir / filename + + obj_types = sorted( + list(set(class_name(obj.type) for obj in category_val.values())) + ) + has_labeled_metrics = any( + getattr(metric, "labeled", False) for metric in category_val.values() + ) + + with filepath.open("w", encoding="utf-8") as fd: + fd.write( + template.render( + parser_version=__version__, + category_name=category_key, + objs=category_val, + obj_types=obj_types, + common_metric_args=util.common_metric_args, + extra_metric_args=util.extra_metric_args, + ping_args=util.ping_args, + namespace=namespace, + has_labeled_metrics=has_labeled_metrics, + glean_namespace=glean_namespace, + ) + ) + # Jinja2 squashes the final newline, so we explicitly add it + fd.write("\n") + + # TODO: Maybe this should just be a separate outputter? + output_gecko_lookup(objs, output_dir, options) diff --git a/third_party/python/glean_parser/glean_parser/lint.py b/third_party/python/glean_parser/glean_parser/lint.py new file mode 100644 index 0000000000..0dc2bddd5d --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/lint.py @@ -0,0 +1,538 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import enum +from pathlib import Path +import re +import sys +from typing import ( + Any, + Callable, + Dict, + Generator, + List, + Iterable, + Optional, + Tuple, + Union, +) # noqa + + +from . import metrics +from . import parser +from . import pings +from . import tags +from . import util + + +LintGenerator = Generator[str, None, None] + + +class CheckType(enum.Enum): + warning = 0 + error = 1 + + +def _split_words(name: str) -> List[str]: + """ + Helper function to split words on either `.` or `_`. + """ + return re.split("[._-]", name) + + +def _english_list(items: List[str]) -> str: + """ + Helper function to format a list [A, B, C] as "'A', 'B', or 'C'". + """ + if len(items) == 0: + return "" + elif len(items) == 1: + return f"'{items[0]}'" + else: + return "{}, or '{}'".format( + ", ".join([f"'{x}'" for x in items[:-1]]), items[-1] + ) + + +def _hamming_distance(str1: str, str2: str) -> int: + """ + Count the # of differences between strings str1 and str2, + padding the shorter one with whitespace + """ + + diffs = 0 + if len(str1) < len(str2): + str1, str2 = str2, str1 + len_dist = len(str1) - len(str2) + str2 += " " * len_dist + + for ch1, ch2 in zip(str1, str2): + if ch1 != ch2: + diffs += 1 + return diffs + + +def check_common_prefix( + category_name: str, metrics: Iterable[metrics.Metric] +) -> LintGenerator: + """ + Check if all metrics begin with a common prefix. + """ + metric_words = sorted([_split_words(metric.name) for metric in metrics]) + + if len(metric_words) < 2: + return + + first = metric_words[0] + last = metric_words[-1] + + for i in range(min(len(first), len(last))): + if first[i] != last[i]: + break + + if i > 0: + common_prefix = "_".join(first[:i]) + yield ( + f"Within category '{category_name}', all metrics begin with " + f"prefix '{common_prefix}'." + "Remove the prefixes on the metric names and (possibly) " + "rename the category." + ) + + +def check_unit_in_name( + metric: metrics.Metric, parser_config: Dict[str, Any] +) -> LintGenerator: + """ + The metric name ends in a unit. + """ + TIME_UNIT_ABBREV = { + "nanosecond": "ns", + "microsecond": "us", + "millisecond": "ms", + "second": "s", + "minute": "m", + "hour": "h", + "day": "d", + } + + MEMORY_UNIT_ABBREV = { + "byte": "b", + "kilobyte": "kb", + "megabyte": "mb", + "gigabyte": "gb", + } + + name_words = _split_words(metric.name) + unit_in_name = name_words[-1] + + time_unit = getattr(metric, "time_unit", None) + memory_unit = getattr(metric, "memory_unit", None) + unit = getattr(metric, "unit", None) + + if time_unit is not None: + if ( + unit_in_name == TIME_UNIT_ABBREV.get(time_unit.name) + or unit_in_name == time_unit.name + ): + yield ( + f"Suffix '{unit_in_name}' is redundant with time_unit " + f"'{time_unit.name}'. Only include time_unit." + ) + elif ( + unit_in_name in TIME_UNIT_ABBREV.keys() + or unit_in_name in TIME_UNIT_ABBREV.values() + ): + yield ( + f"Suffix '{unit_in_name}' doesn't match time_unit " + f"'{time_unit.name}'. " + "Confirm the unit is correct and only include time_unit." + ) + + elif memory_unit is not None: + if ( + unit_in_name == MEMORY_UNIT_ABBREV.get(memory_unit.name) + or unit_in_name == memory_unit.name + ): + yield ( + f"Suffix '{unit_in_name}' is redundant with memory_unit " + f"'{memory_unit.name}'. " + "Only include memory_unit." + ) + elif ( + unit_in_name in MEMORY_UNIT_ABBREV.keys() + or unit_in_name in MEMORY_UNIT_ABBREV.values() + ): + yield ( + f"Suffix '{unit_in_name}' doesn't match memory_unit " + f"{memory_unit.name}'. " + "Confirm the unit is correct and only include memory_unit." + ) + + elif unit is not None: + if unit_in_name == unit: + yield ( + f"Suffix '{unit_in_name}' is redundant with unit param " + f"'{unit}'. " + "Only include unit." + ) + + +def check_category_generic( + category_name: str, metrics: Iterable[metrics.Metric] +) -> LintGenerator: + """ + The category name is too generic. + """ + GENERIC_CATEGORIES = ["metrics", "events"] + + if category_name in GENERIC_CATEGORIES: + yield ( + f"Category '{category_name}' is too generic. " + f"Don't use {_english_list(GENERIC_CATEGORIES)} for category names" + ) + + +def check_bug_number( + metric: Union[metrics.Metric, pings.Ping], parser_config: Dict[str, Any] +) -> LintGenerator: + number_bugs = [str(bug) for bug in metric.bugs if isinstance(bug, int)] + + if len(number_bugs): + yield ( + f"For bugs {', '.join(number_bugs)}: " + "Bug numbers are deprecated and should be changed to full URLs. " + f"For example, use 'http://bugzilla.mozilla.org/{number_bugs[0]}' " + f"instead of '{number_bugs[0]}'." + ) + + +def check_valid_in_baseline( + metric: metrics.Metric, parser_config: Dict[str, Any] +) -> LintGenerator: + allow_reserved = parser_config.get("allow_reserved", False) + + if not allow_reserved and "baseline" in metric.send_in_pings: + yield ( + "The baseline ping is Glean-internal. " + "Remove 'baseline' from the send_in_pings array." + ) + + +def check_misspelled_pings( + metric: metrics.Metric, parser_config: Dict[str, Any] +) -> LintGenerator: + for ping in metric.send_in_pings: + for builtin in pings.RESERVED_PING_NAMES: + distance = _hamming_distance(ping, builtin) + if distance == 1: + yield f"Ping '{ping}' seems misspelled. Did you mean '{builtin}'?" + + +def check_tags_required( + metric_or_ping: Union[metrics.Metric, pings.Ping], parser_config: Dict[str, Any] +) -> LintGenerator: + if parser_config.get("require_tags", False) and not len( + metric_or_ping.metadata.get("tags", []) + ): + yield "Tags are required but no tags specified" + + +def check_user_lifetime_expiration( + metric: metrics.Metric, parser_config: Dict[str, Any] +) -> LintGenerator: + if metric.lifetime == metrics.Lifetime.user and metric.expires != "never": + yield ( + "Metrics with 'user' lifetime cannot have an expiration date. " + "They live as long as the user profile does. " + "Set expires to 'never'." + ) + + +def check_expired_date( + metric: metrics.Metric, parser_config: Dict[str, Any] +) -> LintGenerator: + try: + metric.validate_expires() + except ValueError as e: + yield (str(e)) + + +def check_expired_metric( + metric: metrics.Metric, parser_config: Dict[str, Any] +) -> LintGenerator: + if metric.is_expired(): + yield ("Metric has expired. Please consider removing it.") + + +def check_old_event_api( + metric: metrics.Metric, parser_config: Dict[str, Any] +) -> LintGenerator: + # Glean v52.0.0 removed the old events API. + # The metrics-2-0-0 schema still supports it. + # We want to warn about it. + # This can go when we introduce 3-0-0 + + if not isinstance(metric, metrics.Event): + return + + if not all("type" in x for x in metric.extra_keys.values()): + yield ("The old event API is gone. Extra keys require a type.") + + +def check_redundant_ping( + pings: pings.Ping, parser_config: Dict[str, Any] +) -> LintGenerator: + """ + Check if the pings contains 'ping' as the prefix or suffix, or 'ping' or 'custom' + """ + ping_words = _split_words(pings.name) + + if len(ping_words) != 0: + ping_first_word = ping_words[0] + ping_last_word = ping_words[-1] + + if ping_first_word == "ping": + yield ("The prefix 'ping' is redundant.") + elif ping_last_word == "ping": + yield ("The suffix 'ping' is redundant.") + elif "ping" in ping_words: + yield ("The word 'ping' is redundant.") + elif "custom" in ping_words: + yield ("The word 'custom' is redundant.") + + +# The checks that operate on an entire category of metrics: +# {NAME: (function, is_error)} +CATEGORY_CHECKS: Dict[ + str, Tuple[Callable[[str, Iterable[metrics.Metric]], LintGenerator], CheckType] +] = { + "COMMON_PREFIX": (check_common_prefix, CheckType.error), + "CATEGORY_GENERIC": (check_category_generic, CheckType.error), +} + + +# The checks that operate on individual metrics: +# {NAME: (function, is_error)} +METRIC_CHECKS: Dict[ + str, Tuple[Callable[[metrics.Metric, dict], LintGenerator], CheckType] +] = { + "UNIT_IN_NAME": (check_unit_in_name, CheckType.error), + "BUG_NUMBER": (check_bug_number, CheckType.error), + "BASELINE_PING": (check_valid_in_baseline, CheckType.error), + "MISSPELLED_PING": (check_misspelled_pings, CheckType.error), + "TAGS_REQUIRED": (check_tags_required, CheckType.error), + "EXPIRATION_DATE_TOO_FAR": (check_expired_date, CheckType.warning), + "USER_LIFETIME_EXPIRATION": (check_user_lifetime_expiration, CheckType.warning), + "EXPIRED": (check_expired_metric, CheckType.warning), + "OLD_EVENT_API": (check_old_event_api, CheckType.warning), +} + + +# The checks that operate on individual pings: +# {NAME: (function, is_error)} +PING_CHECKS: Dict[ + str, Tuple[Callable[[pings.Ping, dict], LintGenerator], CheckType] +] = { + "BUG_NUMBER": (check_bug_number, CheckType.error), + "TAGS_REQUIRED": (check_tags_required, CheckType.error), + "REDUNDANT_PING": (check_redundant_ping, CheckType.error), +} + + +class GlinterNit: + def __init__(self, check_name: str, name: str, msg: str, check_type: CheckType): + self.check_name = check_name + self.name = name + self.msg = msg + self.check_type = check_type + + def format(self): + return ( + f"{self.check_type.name.upper()}: {self.check_name}: " + f"{self.name}: {self.msg}" + ) + + +def _lint_item_tags( + item_name: str, + item_type: str, + item_tag_names: List[str], + valid_tag_names: List[str], +) -> List[GlinterNit]: + invalid_tags = [tag for tag in item_tag_names if tag not in valid_tag_names] + return ( + [ + GlinterNit( + "INVALID_TAGS", + item_name, + f"Invalid tags specified in {item_type}: {', '.join(invalid_tags)}", + CheckType.error, + ) + ] + if len(invalid_tags) + else [] + ) + + +def _lint_pings( + category: Dict[str, Union[metrics.Metric, pings.Ping, tags.Tag]], + parser_config: Dict[str, Any], + valid_tag_names: List[str], +) -> List[GlinterNit]: + nits: List[GlinterNit] = [] + + for ping_name, ping in sorted(list(category.items())): + assert isinstance(ping, pings.Ping) + for check_name, (check_func, check_type) in PING_CHECKS.items(): + new_nits = list(check_func(ping, parser_config)) + if len(new_nits): + if check_name not in ping.no_lint: + nits.extend( + GlinterNit( + check_name, + ping_name, + msg, + check_type, + ) + for msg in new_nits + ) + nits.extend( + _lint_item_tags( + ping_name, + "ping", + ping.metadata.get("tags", []), + valid_tag_names, + ) + ) + return nits + + +def lint_metrics( + objs: metrics.ObjectTree, + parser_config: Optional[Dict[str, Any]] = None, + file=sys.stderr, +) -> List[GlinterNit]: + """ + Performs glinter checks on a set of metrics objects. + + :param objs: Tree of metric objects, as returns by `parser.parse_objects`. + :param file: The stream to write errors to. + :returns: List of nits. + """ + if parser_config is None: + parser_config = {} + + nits: List[GlinterNit] = [] + valid_tag_names = [tag for tag in objs.get("tags", [])] + for category_name, category in sorted(list(objs.items())): + if category_name == "pings": + nits.extend(_lint_pings(category, parser_config, valid_tag_names)) + continue + + if category_name == "tags": + # currently we have no linting for tags + continue + + # Make sure the category has only Metrics, not Pings or Tags + category_metrics = dict( + (name, metric) + for (name, metric) in category.items() + if isinstance(metric, metrics.Metric) + ) + + for cat_check_name, (cat_check_func, check_type) in CATEGORY_CHECKS.items(): + if any( + cat_check_name in metric.no_lint for metric in category_metrics.values() + ): + continue + nits.extend( + GlinterNit(cat_check_name, category_name, msg, check_type) + for msg in cat_check_func(category_name, category_metrics.values()) + ) + + for _metric_name, metric in sorted(list(category_metrics.items())): + for check_name, (check_func, check_type) in METRIC_CHECKS.items(): + new_nits = list(check_func(metric, parser_config)) + if len(new_nits): + if check_name not in metric.no_lint: + nits.extend( + GlinterNit( + check_name, + ".".join([metric.category, metric.name]), + msg, + check_type, + ) + for msg in new_nits + ) + + # also check that tags for metric are valid + nits.extend( + _lint_item_tags( + ".".join([metric.category, metric.name]), + "metric", + metric.metadata.get("tags", []), + valid_tag_names, + ) + ) + + if len(nits): + print("Sorry, Glean found some glinter nits:", file=file) + for nit in nits: + print(nit.format(), file=file) + print("", file=file) + print("Please fix the above nits to continue.", file=file) + print( + "To disable a check, add a `no_lint` parameter " + "with a list of check names to disable.\n" + "This parameter can appear with each individual metric, or at the " + "top-level to affect the entire file.", + file=file, + ) + + return nits + + +def lint_yaml_files( + input_filepaths: Iterable[Path], + file=sys.stderr, + parser_config: Optional[Dict[str, Any]] = None, +) -> List: + """Always empty.""" + return [] + + +def glinter( + input_filepaths: Iterable[Path], + parser_config: Optional[Dict[str, Any]] = None, + file=sys.stderr, +) -> int: + """ + Commandline helper for glinter. + + :param input_filepaths: List of Path objects to load metrics from. + :param parser_config: Parser configuration object, passed to + `parser.parse_objects`. + :param file: The stream to write the errors to. + :return: Non-zero if there were any glinter errors. + """ + if parser_config is None: + parser_config = {} + + errors = 0 + + objs = parser.parse_objects(input_filepaths, parser_config) + errors += util.report_validation_errors(objs) + + nits = lint_metrics(objs.value, parser_config=parser_config, file=file) + errors += len([nit for nit in nits if nit.check_type == CheckType.error]) + + if errors == 0: + print("✨ Your metrics are Glean! ✨", file=file) + return 0 + + print(f"❌ Found {errors} errors.") + + return 1 diff --git a/third_party/python/glean_parser/glean_parser/markdown.py b/third_party/python/glean_parser/glean_parser/markdown.py new file mode 100644 index 0000000000..68b288945f --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/markdown.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Outputter to generate Markdown documentation for metrics. +""" + +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union +from urllib.parse import urlsplit, parse_qs + + +from . import __version__ +from . import metrics +from . import pings +from . import util +from collections import defaultdict + + +def extra_info(obj: Union[metrics.Metric, pings.Ping]) -> List[Tuple[str, str]]: + """ + Returns a list of string to string tuples with extra information for the type + (e.g. extra keys for events) or an empty list if nothing is available. + """ + extra_info = [] + + if isinstance(obj, metrics.Event): + for key in obj.allowed_extra_keys: + extra_info.append((key, obj.extra_keys[key]["description"])) + + if isinstance(obj, metrics.Labeled) and obj.ordered_labels is not None: + for label in obj.ordered_labels: + extra_info.append((label, None)) + + if isinstance(obj, metrics.Quantity): + extra_info.append(("unit", obj.unit)) + + return extra_info + + +def ping_desc( + ping_name: str, custom_pings_cache: Optional[Dict[str, pings.Ping]] = None +) -> str: + """ + Return a text description of the ping. If a custom_pings_cache + is available, look in there for non-reserved ping names description. + """ + desc = "" + + if ping_name in pings.RESERVED_PING_NAMES: + desc = ( + "This is a built-in ping that is assembled out of the " + "box by the Glean SDK." + ) + elif ping_name == "all-pings": + desc = "These metrics are sent in every ping." + elif custom_pings_cache is not None and ping_name in custom_pings_cache: + desc = custom_pings_cache[ping_name].description + + return desc + + +def metrics_docs(obj_name: str) -> str: + """ + Return a link to the documentation entry for the Glean SDK metric of the + requested type. + """ + # We need to fixup labeled stuff, as types are singular and docs refer + # to them as plural. + fixedup_name = obj_name + if obj_name.startswith("labeled_"): + fixedup_name += "s" + + return f"https://mozilla.github.io/glean/book/user/metrics/{fixedup_name}.html" + + +def ping_docs(ping_name: str) -> str: + """ + Return a link to the documentation entry for the requested Glean SDK + built-in ping. + """ + if ping_name not in pings.RESERVED_PING_NAMES: + return "" + + return f"https://mozilla.github.io/glean/book/user/pings/{ping_name}.html" + + +def if_empty( + ping_name: str, custom_pings_cache: Optional[Dict[str, pings.Ping]] = None +) -> bool: + if custom_pings_cache is not None and ping_name in custom_pings_cache: + return custom_pings_cache[ping_name].send_if_empty + else: + return False + + +def ping_reasons( + ping_name: str, custom_pings_cache: Dict[str, pings.Ping] +) -> Dict[str, str]: + """ + Returns the reasons dictionary for the ping. + """ + if ping_name == "all-pings": + return {} + elif ping_name in custom_pings_cache: + return custom_pings_cache[ping_name].reasons + + return {} + + +def ping_data_reviews( + ping_name: str, custom_pings_cache: Optional[Dict[str, pings.Ping]] = None +) -> Optional[List[str]]: + if custom_pings_cache is not None and ping_name in custom_pings_cache: + return custom_pings_cache[ping_name].data_reviews + else: + return None + + +def ping_review_title(data_url: str, index: int) -> str: + """ + Return a title for a data review in human readable form. + + :param data_url: A url for data review. + :param index: Position of the data review on list (e.g: 1, 2, 3...). + """ + url_object = urlsplit(data_url) + + # Bugzilla urls like `https://bugzilla.mozilla.org/show_bug.cgi?id=1581647` + query = url_object.query + params = parse_qs(query) + + # GitHub urls like `https://github.com/mozilla-mobile/fenix/pull/1707` + path = url_object.path + short_url = path[1:].replace("/pull/", "#") + + if params and params["id"]: + return f"Bug {params['id'][0]}" + elif url_object.netloc == "github.com": + return short_url + + return f"Review {index}" + + +def ping_bugs( + ping_name: str, custom_pings_cache: Optional[Dict[str, pings.Ping]] = None +) -> Optional[List[str]]: + if custom_pings_cache is not None and ping_name in custom_pings_cache: + return custom_pings_cache[ping_name].bugs + else: + return None + + +def ping_include_client_id( + ping_name: str, custom_pings_cache: Optional[Dict[str, pings.Ping]] = None +) -> bool: + if custom_pings_cache is not None and ping_name in custom_pings_cache: + return custom_pings_cache[ping_name].include_client_id + else: + return False + + +def data_sensitivity_numbers( + data_sensitivity: Optional[List[metrics.DataSensitivity]], +) -> str: + if data_sensitivity is None: + return "unknown" + else: + return ", ".join(str(x.value) for x in data_sensitivity) + + +def output_markdown( + objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]] = None +) -> None: + """ + Given a tree of objects, output Markdown docs to `output_dir`. + + This produces a single `metrics.md`. The file contains a table of + contents and a section for each ping metrics are collected for. + + :param objects: A tree of objects (metrics and pings) as returned from + `parser.parse_objects`. + :param output_dir: Path to an output directory to write to. + :param options: options dictionary, with the following optional key: + - `project_title`: The projects title. + """ + if options is None: + options = {} + + # Build a dictionary that associates pings with their metrics. + # + # { + # "baseline": [ + # { ... metric data ... }, + # ... + # ], + # "metrics": [ + # { ... metric data ... }, + # ... + # ], + # ... + # } + # + # This also builds a dictionary of custom pings, if available. + custom_pings_cache: Dict[str, pings.Ping] = defaultdict() + metrics_by_pings: Dict[str, List[metrics.Metric]] = defaultdict(list) + for _category_key, category_val in objs.items(): + for obj in category_val.values(): + # Filter out custom pings. We will need them for extracting + # the description + if isinstance(obj, pings.Ping): + custom_pings_cache[obj.name] = obj + # Pings that have `send_if_empty` set to true, + # might not have any metrics. They need to at least have an + # empty array of metrics to show up on the template. + if obj.send_if_empty and not metrics_by_pings[obj.name]: + metrics_by_pings[obj.name] = [] + + # If this is an internal Glean metric, and we don't + # want docs for it. + if isinstance(obj, metrics.Metric) and not obj.is_internal_metric(): + # If we get here, obj is definitely a metric we want + # docs for. + for ping_name in obj.send_in_pings: + metrics_by_pings[ping_name].append(obj) + + # Sort the metrics by their identifier, to make them show up nicely + # in the docs and to make generated docs reproducible. + for ping_name in metrics_by_pings: + metrics_by_pings[ping_name] = sorted( + metrics_by_pings[ping_name], key=lambda x: x.identifier() + ) + + project_title = options.get("project_title", "this project") + introduction_extra = options.get("introduction_extra") + + template = util.get_jinja2_template( + "markdown.jinja2", + filters=( + ("extra_info", extra_info), + ("metrics_docs", metrics_docs), + ("ping_desc", lambda x: ping_desc(x, custom_pings_cache)), + ("ping_send_if_empty", lambda x: if_empty(x, custom_pings_cache)), + ("ping_docs", ping_docs), + ("ping_reasons", lambda x: ping_reasons(x, custom_pings_cache)), + ("ping_data_reviews", lambda x: ping_data_reviews(x, custom_pings_cache)), + ("ping_review_title", ping_review_title), + ("ping_bugs", lambda x: ping_bugs(x, custom_pings_cache)), + ( + "ping_include_client_id", + lambda x: ping_include_client_id(x, custom_pings_cache), + ), + ("data_sensitivity_numbers", data_sensitivity_numbers), + ), + ) + + filename = "metrics.md" + filepath = output_dir / filename + + with filepath.open("w", encoding="utf-8") as fd: + fd.write( + template.render( + parser_version=__version__, + metrics_by_pings=metrics_by_pings, + project_title=project_title, + introduction_extra=introduction_extra, + ) + ) + # Jinja2 squashes the final newline, so we explicitly add it + fd.write("\n") diff --git a/third_party/python/glean_parser/glean_parser/metrics.py b/third_party/python/glean_parser/glean_parser/metrics.py new file mode 100644 index 0000000000..6398938997 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/metrics.py @@ -0,0 +1,435 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Classes for each of the high-level metric types. +""" + +import enum +from typing import Any, Dict, List, Optional, Type, Union # noqa + + +from . import pings +from . import tags +from . import util + + +# Important: if the values are ever changing here, make sure +# to also fix mozilla/glean. Otherwise language bindings may +# break there. +class Lifetime(enum.Enum): + ping = 0 + application = 1 + user = 2 + + +class DataSensitivity(enum.Enum): + technical = 1 + interaction = 2 + web_activity = 3 + highly_sensitive = 4 + + +class Metric: + typename: str = "ERROR" + glean_internal_metric_cat: str = "glean.internal.metrics" + metric_types: Dict[str, Any] = {} + default_store_names: List[str] = ["metrics"] + + def __init__( + self, + type: str, + category: str, + name: str, + bugs: List[str], + description: str, + notification_emails: List[str], + expires: Any, + metadata: Optional[Dict] = None, + data_reviews: Optional[List[str]] = None, + version: int = 0, + disabled: bool = False, + lifetime: str = "ping", + send_in_pings: Optional[List[str]] = None, + unit: Optional[str] = None, + gecko_datapoint: str = "", + no_lint: Optional[List[str]] = None, + data_sensitivity: Optional[List[str]] = None, + defined_in: Optional[Dict] = None, + telemetry_mirror: Optional[str] = None, + _config: Optional[Dict[str, Any]] = None, + _validated: bool = False, + ): + # Avoid cyclical import + from . import parser + + self.type = type + self.category = category + self.name = name + self.bugs = bugs + self.description = description + self.notification_emails = notification_emails + self.expires = expires + if metadata is None: + metadata = {} + self.metadata = metadata + if data_reviews is None: + data_reviews = [] + self.data_reviews = data_reviews + self.version = version + self.disabled = disabled + self.lifetime = getattr(Lifetime, lifetime) + if send_in_pings is None: + send_in_pings = ["default"] + self.send_in_pings = send_in_pings + if unit is not None: + self.unit = unit + self.gecko_datapoint = gecko_datapoint + if no_lint is None: + no_lint = [] + self.no_lint = no_lint + if data_sensitivity is not None: + self.data_sensitivity = [ + getattr(DataSensitivity, x) for x in data_sensitivity + ] + self.defined_in = defined_in + if telemetry_mirror is not None: + self.telemetry_mirror = telemetry_mirror + + # _validated indicates whether this metric has already been jsonschema + # validated (but not any of the Python-level validation). + if not _validated: + data = { + "$schema": parser.METRICS_ID, + self.category: {self.name: self._serialize_input()}, + } # type: Dict[str, util.JSONType] + for error in parser.validate(data): + raise ValueError(error) + + # Store the config, but only after validation. + if _config is None: + _config = {} + self._config = _config + + # Metrics in the special category "glean.internal.metrics" need to have + # an empty category string when identifying the metrics in the ping. + if self.category == Metric.glean_internal_metric_cat: + self.category = "" + + def __init_subclass__(cls, **kwargs): + # Create a mapping of all of the subclasses of this class + if cls not in Metric.metric_types and hasattr(cls, "typename"): + Metric.metric_types[cls.typename] = cls + super().__init_subclass__(**kwargs) + + @classmethod + def make_metric( + cls, + category: str, + name: str, + metric_info: Dict[str, util.JSONType], + config: Optional[Dict[str, Any]] = None, + validated: bool = False, + ): + """ + Given a metric_info dictionary from metrics.yaml, return a metric + instance. + + :param: category The category the metric lives in + :param: name The name of the metric + :param: metric_info A dictionary of the remaining metric parameters + :param: config A dictionary containing commandline configuration + parameters + :param: validated True if the metric has already gone through + jsonschema validation + :return: A new Metric instance. + """ + if config is None: + config = {} + + metric_type = metric_info["type"] + if not isinstance(metric_type, str): + raise TypeError(f"Unknown metric type {metric_type}") + return cls.metric_types[metric_type]( + category=category, + name=name, + defined_in=getattr(metric_info, "defined_in", None), + _validated=validated, + _config=config, + **metric_info, + ) + + def serialize(self) -> Dict[str, util.JSONType]: + """ + Serialize the metric back to JSON object model. + """ + d = self.__dict__.copy() + # Convert enum fields back to strings + for key, val in d.items(): + if isinstance(val, enum.Enum): + d[key] = d[key].name + if isinstance(val, set): + d[key] = sorted(list(val)) + if isinstance(val, list) and len(val) and isinstance(val[0], enum.Enum): + d[key] = [x.name for x in val] + del d["name"] + del d["category"] + d.pop("_config", None) + d.pop("_generate_enums", None) + return d + + def _serialize_input(self) -> Dict[str, util.JSONType]: + d = self.serialize() + modified_dict = util.remove_output_params(d, "defined_in") + return modified_dict + + def identifier(self) -> str: + """ + Create an identifier unique for this metric. + Generally, category.name; however, Glean internal + metrics only use name. + """ + if not self.category: + return self.name + return ".".join((self.category, self.name)) + + def is_disabled(self) -> bool: + return self.disabled or self.is_expired() + + def is_expired(self) -> bool: + def default_handler(expires) -> bool: + return util.is_expired(expires, self._config.get("expire_by_version")) + + return self._config.get("custom_is_expired", default_handler)(self.expires) + + def validate_expires(self): + def default_handler(expires): + return util.validate_expires(expires, self._config.get("expire_by_version")) + + return self._config.get("custom_validate_expires", default_handler)( + self.expires + ) + + def is_internal_metric(self) -> bool: + return self.category in (Metric.glean_internal_metric_cat, "") + + +class Boolean(Metric): + typename = "boolean" + + +class String(Metric): + typename = "string" + + +class StringList(Metric): + typename = "string_list" + + +class Counter(Metric): + typename = "counter" + + +class Quantity(Metric): + typename = "quantity" + + +class TimeUnit(enum.Enum): + nanosecond = 0 + microsecond = 1 + millisecond = 2 + second = 3 + minute = 4 + hour = 5 + day = 6 + + +class TimeBase(Metric): + def __init__(self, *args, **kwargs): + self.time_unit = getattr(TimeUnit, kwargs.pop("time_unit", "millisecond")) + super().__init__(*args, **kwargs) + + +class Timespan(TimeBase): + typename = "timespan" + + +class TimingDistribution(TimeBase): + typename = "timing_distribution" + + def __init__(self, *args, **kwargs): + self.time_unit = getattr(TimeUnit, kwargs.pop("time_unit", "nanosecond")) + Metric.__init__(self, *args, **kwargs) + + +class MemoryUnit(enum.Enum): + byte = 0 + kilobyte = 1 + megabyte = 2 + gigabyte = 3 + + +class MemoryDistribution(Metric): + typename = "memory_distribution" + + def __init__(self, *args, **kwargs): + self.memory_unit = getattr(MemoryUnit, kwargs.pop("memory_unit", "byte")) + super().__init__(*args, **kwargs) + + +class HistogramType(enum.Enum): + linear = 0 + exponential = 1 + + +class CustomDistribution(Metric): + typename = "custom_distribution" + + def __init__(self, *args, **kwargs): + self.range_min = kwargs.pop("range_min", 1) + self.range_max = kwargs.pop("range_max") + self.bucket_count = kwargs.pop("bucket_count") + self.histogram_type = getattr( + HistogramType, kwargs.pop("histogram_type", "exponential") + ) + super().__init__(*args, **kwargs) + + +class Datetime(TimeBase): + typename = "datetime" + + +class Event(Metric): + typename = "event" + + default_store_names = ["events"] + + def __init__(self, *args, **kwargs): + self.extra_keys = kwargs.pop("extra_keys", {}) + self.validate_extra_keys(self.extra_keys, kwargs.get("_config", {})) + super().__init__(*args, **kwargs) + self._generate_enums = [("allowed_extra_keys_with_types", "Extra")] + + @property + def allowed_extra_keys(self): + # Sort keys so that output is deterministic + return sorted(list(self.extra_keys.keys())) + + @property + def allowed_extra_keys_with_types(self): + # Sort keys so that output is deterministic + return sorted( + [(k, v.get("type", "string")) for (k, v) in self.extra_keys.items()], + key=lambda x: x[0], + ) + + @staticmethod + def validate_extra_keys(extra_keys: Dict[str, str], config: Dict[str, Any]) -> None: + if not config.get("allow_reserved") and any( + k.startswith("glean.") for k in extra_keys.keys() + ): + raise ValueError( + "Extra keys beginning with 'glean.' are reserved for " + "Glean internal use." + ) + + +class Uuid(Metric): + typename = "uuid" + + +class Url(Metric): + typename = "url" + + +class Jwe(Metric): + typename = "jwe" + + def __init__(self, *args, **kwargs): + raise ValueError( + "JWE support was removed. " + "If you require this send an email to glean-team@mozilla.com." + ) + + +class CowString(str): + """ + Wrapper class for strings that should be represented + as a `Cow<'static, str>` in Rust, + or `String` in other target languages. + + This wraps `str`, so unless `CowString` is specifically + handled it acts (and serializes) + as a string. + """ + + def __init__(self, val: str): + self.inner: str = val + + def __eq__(self, other): + return self.inner == other.inner + + def __hash__(self): + return self.inner.__hash__() + + def __lt__(self, other): + return self.inner.__lt__(other.inner) + + +class Labeled(Metric): + labeled = True + + def __init__(self, *args, **kwargs): + labels = kwargs.pop("labels", None) + if labels is not None: + self.ordered_labels = labels + self.labels = set([CowString(label) for label in labels]) + else: + self.ordered_labels = None + self.labels = None + super().__init__(*args, **kwargs) + + def serialize(self) -> Dict[str, util.JSONType]: + """ + Serialize the metric back to JSON object model. + """ + d = super().serialize() + d["labels"] = self.ordered_labels + del d["ordered_labels"] + return d + + +class LabeledBoolean(Labeled, Boolean): + typename = "labeled_boolean" + + +class LabeledString(Labeled, String): + typename = "labeled_string" + + +class LabeledCounter(Labeled, Counter): + typename = "labeled_counter" + + +class Rate(Metric): + typename = "rate" + + def __init__(self, *args, **kwargs): + self.denominator_metric = kwargs.pop("denominator_metric", None) + super().__init__(*args, **kwargs) + + +class Denominator(Counter): + typename = "denominator" + # A denominator is a counter with an additional list of numerators. + numerators: List[Rate] = [] + + +class Text(Metric): + typename = "text" + + +ObjectTree = Dict[str, Dict[str, Union[Metric, pings.Ping, tags.Tag]]] diff --git a/third_party/python/glean_parser/glean_parser/parser.py b/third_party/python/glean_parser/glean_parser/parser.py new file mode 100644 index 0000000000..5ca584ac1e --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/parser.py @@ -0,0 +1,446 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Code for parsing metrics.yaml files. +""" + +import functools +from pathlib import Path +import textwrap +from typing import Any, Dict, Generator, Iterable, Optional, Tuple, Union + +import jsonschema # type: ignore +from jsonschema.exceptions import ValidationError # type: ignore + +from .metrics import Metric, ObjectTree +from .pings import Ping, RESERVED_PING_NAMES +from .tags import Tag +from . import util +from .util import DictWrapper + + +ROOT_DIR = Path(__file__).parent +SCHEMAS_DIR = ROOT_DIR / "schemas" + +METRICS_ID = "moz://mozilla.org/schemas/glean/metrics/2-0-0" +PINGS_ID = "moz://mozilla.org/schemas/glean/pings/2-0-0" +TAGS_ID = "moz://mozilla.org/schemas/glean/tags/1-0-0" + + +def _update_validator(validator): + """ + Adds some custom validators to the jsonschema validator that produce + nicer error messages. + """ + + def required(validator, required, instance, schema): + if not validator.is_type(instance, "object"): + return + missing_properties = set( + property for property in required if property not in instance + ) + if len(missing_properties): + missing_properties = sorted(list(missing_properties)) + yield ValidationError( + f"Missing required properties: {', '.join(missing_properties)}" + ) + + validator.VALIDATORS["required"] = required + + +def _load_file( + filepath: Path, parser_config: Dict[str, Any] +) -> Generator[str, None, Tuple[Dict[str, util.JSONType], Optional[str]]]: + """ + Load a metrics.yaml or pings.yaml format file. + + If the `filepath` does not exist, raises `FileNotFoundError`, unless + `parser_config["allow_missing_files"]` is `True`. + """ + try: + content = util.load_yaml_or_json(filepath) + except FileNotFoundError: + if not parser_config.get("allow_missing_files", False): + raise + else: + return {}, None + except Exception as e: + yield util.format_error(filepath, "", textwrap.fill(str(e))) + return {}, None + + if content is None: + yield util.format_error(filepath, "", f"'{filepath}' file can not be empty.") + return {}, None + + if not isinstance(content, dict): + return {}, None + + if content == {}: + return {}, None + + schema_key = content.get("$schema") + if not isinstance(schema_key, str): + raise TypeError(f"Invalid schema key {schema_key}") + + filetype: Optional[str] = None + try: + filetype = schema_key.split("/")[-2] + except IndexError: + filetype = None + + if filetype not in ("metrics", "pings", "tags"): + filetype = None + + for error in validate(content, filepath): + content = {} + yield error + + return content, filetype + + +@functools.lru_cache(maxsize=1) +def _load_schemas() -> Dict[str, Tuple[Any, Any]]: + """ + Load all of the known schemas from disk, and put them in a map based on the + schema's $id. + """ + schemas = {} + for schema_path in SCHEMAS_DIR.glob("*.yaml"): + schema = util.load_yaml_or_json(schema_path) + resolver = util.get_null_resolver(schema) + validator_class = jsonschema.validators.validator_for(schema) + _update_validator(validator_class) + validator_class.check_schema(schema) + validator = validator_class(schema, resolver=resolver) + schemas[schema["$id"]] = (schema, validator) + return schemas + + +def _get_schema( + schema_id: str, filepath: Union[str, Path] = "" +) -> Tuple[Any, Any]: + """ + Get the schema for the given schema $id. + """ + schemas = _load_schemas() + if schema_id not in schemas: + raise ValueError( + util.format_error( + filepath, + "", + f"$schema key must be one of {', '.join(schemas.keys())}", + ) + ) + return schemas[schema_id] + + +def _get_schema_for_content( + content: Dict[str, util.JSONType], filepath: Union[str, Path] +) -> Tuple[Any, Any]: + """ + Get the appropriate schema for the given JSON content. + """ + schema_url = content.get("$schema") + if not isinstance(schema_url, str): + raise TypeError("Invalid $schema type {schema_url}") + return _get_schema(schema_url, filepath) + + +def validate( + content: Dict[str, util.JSONType], filepath: Union[str, Path] = "" +) -> Generator[str, None, None]: + """ + Validate the given content against the appropriate schema. + """ + try: + schema, validator = _get_schema_for_content(content, filepath) + except ValueError as e: + yield str(e) + else: + yield from ( + util.format_error(filepath, "", util.pprint_validation_error(e)) + for e in validator.iter_errors(content) + ) + + +def _instantiate_metrics( + all_objects: ObjectTree, + sources: Dict[Any, Path], + content: Dict[str, util.JSONType], + filepath: Path, + config: Dict[str, Any], +) -> Generator[str, None, None]: + """ + Load a list of metrics.yaml files, convert the JSON information into Metric + objects, and merge them into a single tree. + """ + global_no_lint = content.get("no_lint", []) + global_tags = content.get("$tags", []) + assert isinstance(global_tags, list) + + for category_key, category_val in sorted(content.items()): + if category_key.startswith("$"): + continue + if category_key == "no_lint": + continue + if not config.get("allow_reserved") and category_key.split(".")[0] == "glean": + yield util.format_error( + filepath, + f"For category '{category_key}'", + "Categories beginning with 'glean' are reserved for " + "Glean internal use.", + ) + continue + all_objects.setdefault(category_key, DictWrapper()) + + if not isinstance(category_val, dict): + raise TypeError(f"Invalid content for {category_key}") + + for metric_key, metric_val in sorted(category_val.items()): + try: + metric_obj = Metric.make_metric( + category_key, metric_key, metric_val, validated=True, config=config + ) + except Exception as e: + yield util.format_error( + filepath, + f"On instance {category_key}.{metric_key}", + str(e), + metric_val.defined_in["line"], + ) + metric_obj = None + else: + if ( + not config.get("allow_reserved") + and "all-pings" in metric_obj.send_in_pings + ): + yield util.format_error( + filepath, + f"On instance {category_key}.{metric_key}", + 'Only internal metrics may specify "all-pings" ' + 'in "send_in_pings"', + metric_val.defined_in["line"], + ) + metric_obj = None + + if metric_obj is not None: + metric_obj.no_lint = sorted(set(metric_obj.no_lint + global_no_lint)) + if len(global_tags): + metric_obj.metadata["tags"] = sorted( + set(metric_obj.metadata.get("tags", []) + global_tags) + ) + + if isinstance(filepath, Path): + metric_obj.defined_in["filepath"] = str(filepath) + + already_seen = sources.get((category_key, metric_key)) + if already_seen is not None: + # We've seen this metric name already + yield util.format_error( + filepath, + "", + ( + f"Duplicate metric name '{category_key}.{metric_key}' " + f"already defined in '{already_seen}'" + ), + metric_obj.defined_in["line"], + ) + else: + all_objects[category_key][metric_key] = metric_obj + sources[(category_key, metric_key)] = filepath + + +def _instantiate_pings( + all_objects: ObjectTree, + sources: Dict[Any, Path], + content: Dict[str, util.JSONType], + filepath: Path, + config: Dict[str, Any], +) -> Generator[str, None, None]: + """ + Load a list of pings.yaml files, convert the JSON information into Ping + objects. + """ + global_no_lint = content.get("no_lint", []) + assert isinstance(global_no_lint, list) + + for ping_key, ping_val in sorted(content.items()): + if ping_key.startswith("$"): + continue + if ping_key == "no_lint": + continue + if not config.get("allow_reserved"): + if ping_key in RESERVED_PING_NAMES: + yield util.format_error( + filepath, + f"For ping '{ping_key}'", + f"Ping uses a reserved name ({RESERVED_PING_NAMES})", + ) + continue + if not isinstance(ping_val, dict): + raise TypeError(f"Invalid content for ping {ping_key}") + ping_val["name"] = ping_key + try: + ping_obj = Ping( + defined_in=getattr(ping_val, "defined_in", None), + _validated=True, + **ping_val, + ) + except Exception as e: + yield util.format_error(filepath, f"On instance '{ping_key}'", str(e)) + continue + + if ping_obj is not None: + ping_obj.no_lint = sorted(set(ping_obj.no_lint + global_no_lint)) + + if isinstance(filepath, Path) and ping_obj.defined_in is not None: + ping_obj.defined_in["filepath"] = str(filepath) + + already_seen = sources.get(ping_key) + if already_seen is not None: + # We've seen this ping name already + yield util.format_error( + filepath, + "", + f"Duplicate ping name '{ping_key}' " + f"already defined in '{already_seen}'", + ) + else: + all_objects.setdefault("pings", {})[ping_key] = ping_obj + sources[ping_key] = filepath + + +def _instantiate_tags( + all_objects: ObjectTree, + sources: Dict[Any, Path], + content: Dict[str, util.JSONType], + filepath: Path, + config: Dict[str, Any], +) -> Generator[str, None, None]: + """ + Load a list of tags.yaml files, convert the JSON information into Tag + objects. + """ + global_no_lint = content.get("no_lint", []) + assert isinstance(global_no_lint, list) + + for tag_key, tag_val in sorted(content.items()): + if tag_key.startswith("$"): + continue + if tag_key == "no_lint": + continue + if not isinstance(tag_val, dict): + raise TypeError(f"Invalid content for tag {tag_key}") + tag_val["name"] = tag_key + try: + tag_obj = Tag( + defined_in=getattr(tag_val, "defined_in", None), + _validated=True, + **tag_val, + ) + except Exception as e: + yield util.format_error(filepath, f"On instance '{tag_key}'", str(e)) + continue + + if tag_obj is not None: + tag_obj.no_lint = sorted(set(tag_obj.no_lint + global_no_lint)) + + if isinstance(filepath, Path) and tag_obj.defined_in is not None: + tag_obj.defined_in["filepath"] = str(filepath) + + already_seen = sources.get(tag_key) + if already_seen is not None: + # We've seen this tag name already + yield util.format_error( + filepath, + "", + f"Duplicate tag name '{tag_key}' " + f"already defined in '{already_seen}'", + ) + else: + all_objects.setdefault("tags", {})[tag_key] = tag_obj + sources[tag_key] = filepath + + +def _preprocess_objects(objs: ObjectTree, config: Dict[str, Any]) -> ObjectTree: + """ + Preprocess the object tree to better set defaults. + """ + for category in objs.values(): + for obj in category.values(): + if not isinstance(obj, Metric): + continue + + if not config.get("do_not_disable_expired", False) and hasattr( + obj, "is_disabled" + ): + obj.disabled = obj.is_disabled() + + if hasattr(obj, "send_in_pings"): + if "default" in obj.send_in_pings: + obj.send_in_pings = obj.default_store_names + [ + x for x in obj.send_in_pings if x != "default" + ] + obj.send_in_pings = sorted(list(set(obj.send_in_pings))) + return objs + + +@util.keep_value +def parse_objects( + filepaths: Iterable[Path], config: Optional[Dict[str, Any]] = None +) -> Generator[str, None, ObjectTree]: + """ + Parse one or more metrics.yaml and/or pings.yaml files, returning a tree of + `metrics.Metric`, `pings.Ping`, and `tags.Tag` instances. + + The result is a generator over any errors. If there are no errors, the + actual metrics can be obtained from `result.value`. For example:: + + result = metrics.parse_metrics(filepaths) + for err in result: + print(err) + all_metrics = result.value + + The result value is a dictionary of category names to categories, where + each category is a dictionary from metric name to `metrics.Metric` + instances. There are also the special categories `pings` and `tags` + containing all of the `pings.Ping` and `tags.Tag` instances, respectively. + + :param filepaths: list of Path objects to metrics.yaml, pings.yaml, and/or + tags.yaml files + :param config: A dictionary of options that change parsing behavior. + Supported keys are: + + - `allow_reserved`: Allow values reserved for internal Glean use. + - `do_not_disable_expired`: Don't mark expired metrics as disabled. + This is useful when you want to retain the original "disabled" + value from the `metrics.yaml`, rather than having it overridden when + the metric expires. + - `allow_missing_files`: Do not raise a `FileNotFoundError` if any of + the input `filepaths` do not exist. + """ + if config is None: + config = {} + + all_objects: ObjectTree = DictWrapper() + sources: Dict[Any, Path] = {} + filepaths = util.ensure_list(filepaths) + for filepath in filepaths: + content, filetype = yield from _load_file(filepath, config) + if filetype == "metrics": + yield from _instantiate_metrics( + all_objects, sources, content, filepath, config + ) + elif filetype == "pings": + yield from _instantiate_pings( + all_objects, sources, content, filepath, config + ) + elif filetype == "tags": + yield from _instantiate_tags( + all_objects, sources, content, filepath, config + ) + return _preprocess_objects(all_objects, config) diff --git a/third_party/python/glean_parser/glean_parser/pings.py b/third_party/python/glean_parser/glean_parser/pings.py new file mode 100644 index 0000000000..cb5f2487b9 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/pings.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Classes for managing the description of pings. +""" + +from typing import Dict, List, Optional + + +from . import util + + +RESERVED_PING_NAMES = ["baseline", "metrics", "events", "deletion-request", "default"] + + +class Ping: + def __init__( + self, + name: str, + description: str, + bugs: List[str], + notification_emails: List[str], + metadata: Optional[Dict] = None, + data_reviews: Optional[List[str]] = None, + include_client_id: bool = False, + send_if_empty: bool = False, + reasons: Optional[Dict[str, str]] = None, + defined_in: Optional[Dict] = None, + no_lint: Optional[List[str]] = None, + _validated: bool = False, + ): + # Avoid cyclical import + from . import parser + + self.name = name + self.description = description + + self.bugs = bugs + self.notification_emails = notification_emails + if metadata is None: + metadata = {} + self.metadata = metadata + if data_reviews is None: + data_reviews = [] + self.data_reviews = data_reviews + self.include_client_id = include_client_id + self.send_if_empty = send_if_empty + if reasons is None: + reasons = {} + self.reasons = reasons + self.defined_in = defined_in + if no_lint is None: + no_lint = [] + self.no_lint = no_lint + + # _validated indicates whether this ping has already been jsonschema + # validated (but not any of the Python-level validation). + if not _validated: + data: Dict[str, util.JSONType] = { + "$schema": parser.PINGS_ID, + self.name: self._serialize_input(), + } + for error in parser.validate(data): + raise ValueError(error) + + _generate_enums = [("reason_codes", "ReasonCodes")] + + @property + def type(self) -> str: + return "ping" + + @property + def reason_codes(self) -> List[str]: + return sorted(list(self.reasons.keys())) + + def serialize(self) -> Dict[str, util.JSONType]: + """ + Serialize the metric back to JSON object model. + """ + d = self.__dict__.copy() + del d["name"] + return d + + def _serialize_input(self) -> Dict[str, util.JSONType]: + d = self.serialize() + modified_dict = util.remove_output_params(d, "defined_in") + return modified_dict + + def identifier(self) -> str: + """ + Used for the "generated from ..." comment in the output. + """ + return self.name diff --git a/third_party/python/glean_parser/glean_parser/rust.py b/third_party/python/glean_parser/glean_parser/rust.py new file mode 100644 index 0000000000..eb3355e382 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/rust.py @@ -0,0 +1,218 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Outputter to generate Rust code for metrics. +""" + +import enum +import json +from pathlib import Path +from typing import Any, Dict, Optional, Union + +from . import __version__ +from . import metrics +from . import pings +from . import tags +from . import util + + +def rust_datatypes_filter(value): + """ + A Jinja2 filter that renders Rust literals. + + Based on Python's JSONEncoder, but overrides: + - dicts and sets to raise an error + - sets to vec![] (used in labels) + - enums to become Class::Value + - lists to vec![] (used in send_in_pings) + - null to None + - strings to "value".into() + - Rate objects to a CommonMetricData initializer + (for external Denominators' Numerators lists) + """ + + class RustEncoder(json.JSONEncoder): + def iterencode(self, value): + if isinstance(value, dict): + raise ValueError("RustEncoder doesn't know dicts {}".format(str(value))) + elif isinstance(value, enum.Enum): + yield (value.__class__.__name__ + "::" + util.Camelize(value.name)) + elif isinstance(value, set): + yield "vec![" + first = True + for subvalue in sorted(list(value)): + if not first: + yield ", " + yield from self.iterencode(subvalue) + first = False + yield "]" + elif isinstance(value, list): + yield "vec![" + first = True + for subvalue in list(value): + if not first: + yield ", " + yield from self.iterencode(subvalue) + first = False + yield "]" + elif value is None: + yield "None" + # `CowStr` is a `str`, so needs to be before next case + elif isinstance(value, metrics.CowString): + yield f'::std::borrow::Cow::from("{value.inner}")' + elif isinstance(value, str): + yield f'"{value}".into()' + elif isinstance(value, metrics.Rate): + yield "CommonMetricData(" + first = True + for arg_name in util.common_metric_args: + if hasattr(value, arg_name): + if not first: + yield ", " + yield f"{util.camelize(arg_name)} = " + yield from self.iterencode(getattr(value, arg_name)) + first = False + yield ")" + else: + yield from super().iterencode(value) + + return "".join(RustEncoder().iterencode(value)) + + +def ctor(obj): + """ + Returns the scope and name of the constructor to use for a metric object. + Necessary because LabeledMetric is constructed using LabeledMetric::new + not LabeledMetric::new + """ + if getattr(obj, "labeled", False): + return "LabeledMetric::new" + return class_name(obj.type) + "::new" + + +def type_name(obj): + """ + Returns the Rust type to use for a given metric or ping object. + """ + + if getattr(obj, "labeled", False): + return "LabeledMetric<{}>".format(class_name(obj.type)) + generate_enums = getattr(obj, "_generate_enums", []) # Extra Keys? Reasons? + if len(generate_enums): + generic = None + for name, suffix in generate_enums: + if len(getattr(obj, name)): + generic = util.Camelize(obj.name) + suffix + else: + if isinstance(obj, metrics.Event): + generic = "NoExtra" + else: + generic = "No" + suffix + + return "{}<{}>".format(class_name(obj.type), generic) + + return class_name(obj.type) + + +def extra_type_name(typ: str) -> str: + """ + Returns the corresponding Rust type for event's extra key types. + """ + + if typ == "boolean": + return "bool" + elif typ == "string": + return "String" + elif typ == "quantity": + return "u32" + else: + return "UNSUPPORTED" + + +def class_name(obj_type): + """ + Returns the Rust class name for a given metric or ping type. + """ + if obj_type == "ping": + return "Ping" + if obj_type.startswith("labeled_"): + obj_type = obj_type[8:] + return util.Camelize(obj_type) + "Metric" + + +def extra_keys(allowed_extra_keys): + """ + Returns the &'static [&'static str] ALLOWED_EXTRA_KEYS for impl ExtraKeys + """ + return "&[" + ", ".join([f'"{key}"' for key in allowed_extra_keys]) + "]" + + +class Category: + """ + Data struct holding information about a metric to be used in the template. + """ + + def __init__( + self, + name: str, + objs: Dict[str, Union[metrics.Metric, pings.Ping, tags.Tag]], + contains_pings: bool, + ): + self.name = name + self.objs = objs + self.contains_pings = contains_pings + + +def output_rust( + objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]] = None +) -> None: + """ + Given a tree of objects, output Rust code to `output_dir`. + + :param objs: A tree of objects (metrics and pings) as returned from + `parser.parse_objects`. + :param output_dir: Path to an output directory to write to. + :param options: options dictionary, not currently used for Rust + """ + + if options is None: + options = {} + + template = util.get_jinja2_template( + "rust.jinja2", + filters=( + ("rust", rust_datatypes_filter), + ("snake_case", util.snake_case), + ("camelize", util.camelize), + ("type_name", type_name), + ("extra_type_name", extra_type_name), + ("ctor", ctor), + ("extra_keys", extra_keys), + ), + ) + + filename = "glean_metrics.rs" + filepath = output_dir / filename + categories = [] + + for category_key, category_val in objs.items(): + contains_pings = any( + isinstance(obj, pings.Ping) for obj in category_val.values() + ) + + cat = Category(category_key, category_val, contains_pings) + categories.append(cat) + + with filepath.open("w", encoding="utf-8") as fd: + fd.write( + template.render( + parser_version=__version__, + categories=categories, + extra_metric_args=util.extra_metric_args, + common_metric_args=util.common_metric_args, + ) + ) diff --git a/third_party/python/glean_parser/glean_parser/schemas/metrics.1-0-0.schema.yaml b/third_party/python/glean_parser/glean_parser/schemas/metrics.1-0-0.schema.yaml new file mode 100644 index 0000000000..047124b771 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/schemas/metrics.1-0-0.schema.yaml @@ -0,0 +1,605 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +--- +$schema: http://json-schema.org/draft-07/schema# +title: Metrics +description: | + Schema for the metrics.yaml files for Mozilla's Glean telemetry SDK. + + The top-level of the `metrics.yaml` file has a key defining each category of + metrics. Categories must be snake_case, and they may also have dots `.` to + define subcategories. + +$id: moz://mozilla.org/schemas/glean/metrics/1-0-0 + +definitions: + token: + type: string + pattern: "^[A-Za-z_][A-Za-z0-9_\\.]*$" + + snake_case: + type: string + pattern: "^[a-z_][a-z0-9_]*$" + + dotted_snake_case: + type: string + pattern: "^[a-z_][a-z0-9_]{0,29}(\\.[a-z_][a-z0-9_]{0,29})*$" + maxLength: 40 + + kebab_case: + type: string + # Bug 1601270; we allow 3 specific existing snake_cased ping names for now, + # but these special cases can be removed once the number of legacy clients + # sufficiently dwindles, likely in 2020H2. + pattern: "^[a-z][a-z0-9-]{0,29}$\ + |^deletion_request$|^bookmarks_sync$|^history_sync$|^session_end$|^all_pings$|^glean_.*$" + + long_id: + allOf: + - $ref: "#/definitions/snake_case" + - maxLength: 40 + + short_id: + allOf: + - $ref: "#/definitions/snake_case" + - maxLength: 30 + + labeled_metric_id: + type: string + pattern: "^[a-z_][a-z0-9_-]{0,29}(\\.[a-z_][a-z0-9_-]{0,29})*$" + maxLength: 71 # Note: this should be category + metric + 1 + + metric: + description: | + Describes a single metric. + + See https://mozilla.github.io/glean_parser/metrics-yaml.html + + type: object + + additionalProperties: false + + properties: + type: + title: Metric type + description: | + **Required.** + + Specifies the type of a metric, like "counter" or "event". This + defines which operations are valid for the metric, how it is stored + and how data analysis tooling displays it. + + The supported types are: + - `event`: Record a specific event (with optional metadata). + Additional properties: `extra_keys`. + + - `boolean`: A metric storing values of true or false. + + - `string`: A metric storing Unicode string values. + + - `string_list`: a list of Unicode strings. + + - `counter`: A numeric value that can only be incremented. + + - `quantity`: A numeric value that is set directly. + + - `timespan`: Represents a time interval. Additional properties: + `time_unit`. + + - `timing_distribution`: Record the distribution of multiple + timings. Additional properties: `time_unit`. + + - `datetime`: A date/time value. Represented as an ISO datetime in + UTC. Additional properties: `time_unit`. + + - `uuid`: Record a UUID v4. + + - `jwe`: Record a [JWE](https://tools.ietf.org/html/rfc7516) value. + + - `memory_distribution`: A histogram for recording memory usage + values. Additional properties: `memory_unit`. + + - `custom_distribution`: A histogram with a custom range and number + of buckets. This metric type is for legacy support only and is + only allowed for metrics coming from GeckoView. Additional + properties: `range_min`, `range_max`, `bucket_count`, + `histogram_type`. + + - `rate`: Used to record the rate something happens relative to some + other thing. For example, the number of HTTP connections that + experience an error relative to the number of total HTTP + connections made. + + - Additionally, labeled versions of many metric types are supported. + These support the `labels`_ parameter, allowing multiple instances + of the metric to be stored at a given set of labels. The labeled + metric types include: + + `labeled_boolean`, `labeled_string`, `labeled_counter`. + + type: string + enum: + - event + - boolean + - string + - string_list + - counter + - quantity + - timespan + - timing_distribution + - custom_distribution + - memory_distribution + - datetime + - uuid + - jwe + - labeled_boolean + - labeled_string + - labeled_counter + + description: + title: Description + description: | + **Required.** + + A textual description of what this metric does, what it means, and its + edge cases or any other helpful information. + + Descriptions may contain [markdown + syntax](https://www.markdownguide.org/basic-syntax/). + type: string + + lifetime: + title: Lifetime + description: | + Defines the lifetime of the metric. It must be one of the following + values: + + - `ping` (default): The metric is reset each time it is sent in a + ping. + + - `user`: The metric contains a property that is part of the user's + profile and is never reset. + + - `application`: The metric contains a property that is related to the + application, and is reset only at application restarts. + enum: + - ping + - user + - application + default: ping + + send_in_pings: + title: Send in pings + description: | + Which pings the metric should be sent on. If not specified, the metric + is sent on the "default ping", which is the `events` ping for events, + and the `metrics` ping for everything else. Most metrics don't need to + specify this. + + (There is an additional special value of `all-pings` for internal + Glean metrics only that is used to indicate that a metric may appear + in any ping.) + type: array + items: + $ref: "#/definitions/kebab_case" + default: + - default + + notification_emails: + title: Notification emails + description: | + **Required.** + + A list of email addresses to notify for important events with the + metric or when people with context or ownership for the metric need to + be contacted. + type: array + minItems: 1 + items: + type: string + format: email + + bugs: + title: Related bugs + description: | + **Required.** + + A list of bug URLs (e.g. Bugzilla and Github) that are relevant to + this metric, e.g., tracking its original implementation or later + changes to it. + + Using bug numbers alone is deprecated and will be an error in the + future. Each entry should be a full URL to the bug in its tracker. + type: array + minItems: 1 + items: + anyOf: + - type: integer # Keep supporting integer for backward-compat + - type: string + format: uri + + data_reviews: + title: Review references + description: | + **Required.** + + A list of URIs to any data collection reviews relevant to the metric. + type: array + items: + type: string + format: uri + + disabled: + title: Disabled + description: | + If `true`, the metric is disabled, and any metric collection on it + will be silently ignored at runtime. + type: boolean + default: false + + expires: + title: Expires + description: | + **Required.** + + By default it may be one of the following values: + - ``: An ISO date `yyyy-mm-dd` in UTC on which the + metric expires. For example, `2019-03-13`. This date is checked at + build time. Except in special cases, this form should be used so + that the metric automatically "sunsets" after a period of time. + - `never`: This metric never expires. + - `expired`: This metric is manually expired. + + The default may be overriden in certain applications by the + `custom_validate_expires` and `custom_is_expired` configs. + type: string + + version: + title: Metric version + description: | + The version of the metric. A monotonically increasing value. If not + provided, defaults to 0. + + time_unit: + title: Time unit + description: | + For timespans and datetimes, specifies the unit that the metric will + be stored and displayed in. If not provided, it defaults to + "millisecond". Time values are sent to the backend as integers, so + `time_unit`_ determines the maximum resolution at which timespans are + recorded. Times are always truncated, not rounded, to the nearest time + unit. For example, a measurement of 25 ns will be returned as 0 ms if + `time_unit` is `"millisecond"`. + + For timing distributions, times are always recorded and sent in + nanoseconds, but `time_unit` controls the minimum and maximum values. + If not provided, it defaults to "nanosecond". + + - nanosecond: 1ns <= x <= 10 minutes + - microsecond: 1μs <= x <= ~6.94 days + - millisecond: 1ms <= x <= ~19 years + + Valid when `type`_ is `timespan`, `timing_distribution` or `datetime`. + enum: + - nanosecond + - microsecond + - millisecond + - second + - minute + - hour + - day + + memory_unit: + title: Memory unit + description: | + The unit that the incoming memory size values are recorded in. + + The units are the power-of-2 units, so "kilobyte" is correctly a + "kibibyte". + + - kilobyte == 2^10 == 1,024 bytes + - megabyte == 2^20 == 1,048,576 bytes + - gigabyte == 2^30 == 1,073,741,824 bytes + + Values are automatically converted to and transmitted as bytes. + + Valid when `type`_ is `memory_distribution`. + enum: + - byte + - kilobyte + - megabyte + - gigabyte + + labels: + title: Labels + description: | + A list of labels for a labeled metric. If provided, the labels are + enforced at run time, and recording to an unknown label is recorded + to the special label `__other__`. If not provided, the labels + may be anything, but using too many unique labels will put some + labels in the special label `__other__`. + + Valid with any of the labeled metric types. + anyOf: + - type: array + uniqueItems: true + items: + $ref: "#/definitions/labeled_metric_id" + maxItems: 16 + - type: "null" + + extra_keys: + title: Extra keys + description: | + The acceptable keys on the "extra" object sent with events. This is an + object mapping the key to an object containing metadata about the key. + A maximum of 10 extra keys is allowed. + This metadata object has the following keys: + + - `description`: **Required.** A description of the key. + + Valid when `type`_ is `event`. + type: object + propertyNames: + $ref: "#/definitions/dotted_snake_case" + additionalProperties: + type: object + properties: + description: + type: string + required: + - description + maxProperties: 10 + default: {} + + gecko_datapoint: + title: Gecko Datapoint + description: | + This is a Gecko-specific property. It is the name of the Gecko metric + to accumulate the data from, when using the Glean SDK in a product + using GeckoView. See bug 1566356 for more context. + + type: string + + range_min: + title: Range minimum + description: | + The minimum value of a custom distribution. + + Valid when `type`_ is `custom_distribution`. + type: number + default: 1 + + range_max: + title: Range maximum + description: | + The maximum value of a custom distribution. + + Required when `type`_ is `custom_distribution`. + type: number + + bucket_count: + title: Bucket count + description: | + The number of buckets to include in a custom distribution. + + Required when `type`_ is `custom_distribution`. + type: number + minimum: 1 + maximum: 100 + + histogram_type: + title: Histogram type + description: | + The type of histogram bucketing to use: + - `linear`: The buckets are linearly spaced within the range. + - `exponential`: The buckets use the natural logarithmic so the + smaller-valued buckets are smaller in size than the higher-valued + buckets. + + Required when `type`_ is `custom_distribution`. + enum: + - linear + - exponential + + unit: + title: Unit + description: | + The unit of the metric, for metrics that don't already require a + meaningful unit, such as `time_unit`. + This is provided for informational purposes only and doesn't have any + effect on data collection. + type: string + + no_lint: + title: Lint checks to skip + description: | + This parameter lists any lint checks to skip for this metric only. + type: array + items: + type: string + + decrypted_name: + title: Decrypted name + description: | + Name of the column where to persist the decrypted value + stored in the JWE after processing. + + Required when `type`_ is `jwe`. + type: string + pattern: "^[a-z_][a-z0-9_]{0,29}(\\.[a-z_][a-z0-9_]{0,29})*$" + + data_sensitivity: + title: The level of data sensitivity + description: | + There are four data collection categories related to data sensitivity + [defined here](https://wiki.mozilla.org/Firefox/Data_Collection): + + - **Category 1: Technical Data:** (`technical`) Information about the + machine or Firefox itself. Examples include OS, available memory, + crashes and errors, outcome of automated processes like updates, + safebrowsing, activation, version \#s, and buildid. This also + includes compatibility information about features and APIs used by + websites, addons, and other 3rd-party software that interact with + Firefox during usage. + + - **Category 2: Interaction Data:** (`interaction`) Information about + the user’s direct engagement with Firefox. Examples include how many + tabs, addons, or windows a user has open; uses of specific Firefox + features; session length, scrolls and clicks; and the status of + discrete user preferences. + + - **Category 3: Web activity data:** (`web_activity`) Information + about user web browsing that could be considered sensitive. Examples + include users’ specific web browsing history; general information + about their web browsing history (such as TLDs or categories of + webpages visited over time); and potentially certain types of + interaction data about specific webpages visited. + + - **Category 4: Highly sensitive data:** (`highly_sensitive`) + Information that directly identifies a person, or if combined with + other data could identify a person. Examples include e-mail, + usernames, identifiers such as google ad id, apple id, fxaccount, + city or country (unless small ones are explicitly filtered out), or + certain cookies. It may be embedded within specific website content, + such as memory contents, dumps, captures of screen data, or DOM + data. + type: array + items: + enum: + - technical + - interaction + - web_activity + - highly_sensitive + type: string + minLength: 1 + uniqueItems: true + + required: + - type + - bugs + - description + - notification_emails + - data_reviews + - expires + +type: object + +propertyNames: + anyOf: + - allOf: + - $ref: "#/definitions/dotted_snake_case" + - not: + description: "'pings' is reserved as a category name." + const: pings + - enum: ['$schema'] + +properties: + $schema: + type: string + format: url + + no_lint: + title: Lint checks to skip globally + description: | + This parameter lists any lint checks to skip for this whole file. + type: array + items: + type: string + +additionalProperties: + type: object + propertyNames: + anyOf: + - $ref: "#/definitions/short_id" + additionalProperties: + allOf: + - $ref: "#/definitions/metric" + - + if: + properties: + type: + const: event + then: + properties: + lifetime: + description: | + Event metrics must have ping lifetime. + const: ping + - if: + not: + properties: + type: + enum: + - timing_distribution + - custom_distribution + - memory_distribution + - quantity + - boolean + - string + - labeled_counter + then: + properties: + gecko_datapoint: + description: | + `gecko_datapoint` is only allowed for `timing_distribution`, + `custom_distribution`, `memory_distribution`, `quantity`, + `boolean`, `string` and `labeled_counter`. + maxLength: 0 + - + if: + properties: + type: + enum: + - custom_distribution + then: + required: + - gecko_datapoint + description: | + `custom_distribution` is only allowed for Gecko + metrics. + - + if: + properties: + type: + const: custom_distribution + then: + required: + - range_max + - bucket_count + - histogram_type + description: | + `custom_distribution` is missing required parameters `range_max`, + `bucket_count` and `histogram_type`. + - + if: + properties: + type: + const: memory_distribution + then: + required: + - memory_unit + description: | + `memory_distribution` is missing required parameter `memory_unit`. + - + if: + properties: + type: + const: quantity + then: + required: + - unit + description: | + `quantity` is missing required parameter `unit`. + - + if: + properties: + type: + const: jwe + then: + required: + - decrypted_name + description: | + `jwe` is missing required parameter `decrypted_name`. diff --git a/third_party/python/glean_parser/glean_parser/schemas/metrics.2-0-0.schema.yaml b/third_party/python/glean_parser/glean_parser/schemas/metrics.2-0-0.schema.yaml new file mode 100644 index 0000000000..ff99f328c9 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/schemas/metrics.2-0-0.schema.yaml @@ -0,0 +1,735 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +--- +$schema: http://json-schema.org/draft-07/schema# +title: Metrics +description: | + Schema for the metrics.yaml files for Mozilla's Glean telemetry SDK. + + The top-level of the `metrics.yaml` file has a key defining each category of + metrics. Categories must be snake_case, and they may also have dots `.` to + define subcategories. + +$id: moz://mozilla.org/schemas/glean/metrics/2-0-0 + +definitions: + token: + type: string + pattern: "^[A-Za-z_][A-Za-z0-9_\\.]*$" + + snake_case: + type: string + pattern: "^[a-z_][a-z0-9_]*$" + + dotted_snake_case: + type: string + pattern: "^[a-z_][a-z0-9_]{0,29}(\\.[a-z_][a-z0-9_]{0,29})*$" + maxLength: 40 + + # Prior to version 2.0.0 of the schema, special ping names with underscores + # were also supported. + kebab_case: + type: string + pattern: "^[a-z][a-z0-9-]{0,29}$" + + long_id: + allOf: + - $ref: "#/definitions/snake_case" + - maxLength: 40 + + short_id: + allOf: + - $ref: "#/definitions/snake_case" + - maxLength: 30 + + labeled_metric_id: + type: string + pattern: "^[ -~]+$" + maxLength: 71 # Note: this should be category + metric + 1 + + metric: + description: | + Describes a single metric. + + See https://mozilla.github.io/glean_parser/metrics-yaml.html + + type: object + + additionalProperties: false + + properties: + type: + title: Metric type + description: | + **Required.** + + Specifies the type of a metric, like "counter" or "event". This + defines which operations are valid for the metric, how it is stored + and how data analysis tooling displays it. + + The supported types are: + - `event`: Record a specific event (with optional metadata). + Additional properties: `extra_keys`. + + - `boolean`: A metric storing values of true or false. + + - `string`: A metric storing Unicode string values. + + - `string_list`: a list of Unicode strings. + + - `counter`: A numeric value that can only be incremented. + + - `quantity`: A numeric value that is set directly. + + - `timespan`: Represents a time interval. Additional properties: + `time_unit`. + + - `timing_distribution`: Record the distribution of multiple + timings. Additional properties: `time_unit`. + + - `datetime`: A date/time value. Represented as an ISO datetime in + UTC. Additional properties: `time_unit`. + + - `uuid`: Record a UUID v4. + + - `url`: Record a valid URL string. + + - `memory_distribution`: A histogram for recording memory usage + values. Additional properties: `memory_unit`. + + - `custom_distribution`: A histogram with a custom range and number + of buckets. This metric type is for legacy support only and is + only allowed for metrics coming from GeckoView. Additional + properties: `range_min`, `range_max`, `bucket_count`, + `histogram_type`. + + - `rate`: Used to record the rate something happens relative to some + other thing. For example, the number of HTTP connections that + experience an error relative to the number of total HTTP + connections made. + + - Additionally, labeled versions of many metric types are supported. + These support the `labels`_ parameter, allowing multiple instances + of the metric to be stored at a given set of labels. The labeled + metric types include: + + `labeled_boolean`, `labeled_string`, `labeled_counter`. + + - `text`: Record long text data. + + type: string + enum: + - event + - boolean + - string + - string_list + - counter + - quantity + - timespan + - timing_distribution + - custom_distribution + - memory_distribution + - datetime + - uuid + - url + - jwe + - labeled_boolean + - labeled_string + - labeled_counter + - rate + - text + + description: + title: Description + description: | + **Required.** + + A textual description of what this metric does, what it means, and its + edge cases or any other helpful information. + + Descriptions may contain [markdown + syntax](https://www.markdownguide.org/basic-syntax/). + type: string + + metadata: + title: Metadata + description: | + Additional metadata about this metric. Currently limited to a list of + tags. + type: object + properties: + tags: + title: Tags + description: Which tags are specified for this metric. + type: array + items: + type: string + maxLength: 80 + default: {} + + lifetime: + title: Lifetime + description: | + Defines the lifetime of the metric. It must be one of the following + values: + + - `ping` (default): The metric is reset each time it is sent in a + ping. + + - `user`: The metric contains a property that is part of the user's + profile and is never reset. + + - `application`: The metric contains a property that is related to the + application, and is reset only at application restarts. + enum: + - ping + - user + - application + default: ping + + send_in_pings: + title: Send in pings + description: | + Which pings the metric should be sent on. If not specified, the metric + is sent on the "default ping", which is the `events` ping for events, + and the `metrics` ping for everything else. Most metrics don't need to + specify this. + + (There is an additional special value of `all-pings` for internal + Glean metrics only that is used to indicate that a metric may appear + in any ping.) + type: array + items: + anyOf: + - $ref: "#/definitions/kebab_case" + # Allow "special" ping names that start with "glean_" used + # internally by the Glean SDK + - type: string + pattern: "^glean_.*$" + default: + - default + + notification_emails: + title: Notification emails + description: | + **Required.** + + A list of email addresses to notify for important events with the + metric or when people with context or ownership for the metric need to + be contacted. + type: array + minItems: 1 + items: + type: string + format: email + + bugs: + title: Related bugs + description: | + **Required.** + + A list of bug URLs (e.g. Bugzilla and Github) that are relevant to + this metric, e.g., tracking its original implementation or later + changes to it. + + Prior to version 2.0.0 of the schema, bugs could also be integers. + type: array + minItems: 1 + items: + type: string + format: uri + + data_reviews: + title: Review references + description: | + **Required.** + + A list of URIs to any data collection reviews relevant to the metric. + type: array + items: + type: string + format: uri + + disabled: + title: Disabled + description: | + If `true`, the metric is disabled, and any metric collection on it + will be silently ignored at runtime. + type: boolean + default: false + + expires: + title: Expires + description: | + **Required.** + + By default it may be one of the following values: + - ``: An ISO date `yyyy-mm-dd` in UTC on which the + metric expires. For example, `2019-03-13`. This date is checked at + build time. Except in special cases, this form should be used so + that the metric automatically "sunsets" after a period of time. + - ``: An integer greater than 0 representing the + major version the metric expires in. For example, `11`. The + version is checked at build time against the major provided to the + glean_parser and is only valid if a major version is provided at + built time. If no major version is provided at build time and + expiration by major version is used for a metric, an error is + raised. + Note that mixing expiration by date and version is not allowed + within a product. + - `never`: This metric never expires. + - `expired`: This metric is manually expired. + + The default may be overriden in certain applications by the + `custom_validate_expires` and `custom_is_expired` configs. + oneOf: + - type: string + - type: integer + minimum: 1 + + version: + title: Metric version + description: | + The version of the metric. A monotonically increasing value. If not + provided, defaults to 0. + + time_unit: + title: Time unit + description: | + For timespans and datetimes, specifies the unit that the metric will + be stored and displayed in. If not provided, it defaults to + "millisecond". Time values are sent to the backend as integers, so + `time_unit`_ determines the maximum resolution at which timespans are + recorded. Times are always truncated, not rounded, to the nearest time + unit. For example, a measurement of 25 ns will be returned as 0 ms if + `time_unit` is `"millisecond"`. + + For timing distributions, times are always recorded and sent in + nanoseconds, but `time_unit` controls the minimum and maximum values. + If not provided, it defaults to "nanosecond". + + - nanosecond: 1ns <= x <= 10 minutes + - microsecond: 1μs <= x <= ~6.94 days + - millisecond: 1ms <= x <= ~19 years + + Valid when `type`_ is `timespan`, `timing_distribution` or `datetime`. + enum: + - nanosecond + - microsecond + - millisecond + - second + - minute + - hour + - day + + memory_unit: + title: Memory unit + description: | + The unit that the incoming memory size values are recorded in. + + The units are the power-of-2 units, so "kilobyte" is correctly a + "kibibyte". + + - kilobyte == 2^10 == 1,024 bytes + - megabyte == 2^20 == 1,048,576 bytes + - gigabyte == 2^30 == 1,073,741,824 bytes + + Values are automatically converted to and transmitted as bytes. + + Valid when `type`_ is `memory_distribution`. + enum: + - byte + - kilobyte + - megabyte + - gigabyte + + labels: + title: Labels + description: | + A list of labels for a labeled metric. If provided, the labels are + enforced at run time, and recording to an unknown label is recorded + to the special label `__other__`. If not provided, the labels + may be anything, but using too many unique labels will put some + labels in the special label `__other__`. + + Valid with any of the labeled metric types. + anyOf: + - type: array + uniqueItems: true + items: + $ref: "#/definitions/labeled_metric_id" + maxItems: 4096 + - type: "null" + + extra_keys: + title: Extra keys + description: | + The acceptable keys on the "extra" object sent with events. This is an + object mapping the key to an object containing metadata about the key. + A maximum of 15 extra keys is allowed. + This metadata object has the following keys: + + - `description`: **Required.** A description of the key. + + Valid when `type`_ is `event`. + type: object + propertyNames: + $ref: "#/definitions/dotted_snake_case" + additionalProperties: + type: object + properties: + description: + type: string + type: + type: string + enum: + - string + - boolean + - quantity + required: + - description + maxProperties: 15 + default: {} + + gecko_datapoint: + title: Gecko Datapoint + description: | + This is a Gecko-specific property. It is the name of the Gecko metric + to accumulate the data from, when using the Glean SDK in a product + using GeckoView. See bug 1566356 for more context. + + type: string + + range_min: + title: Range minimum + description: | + The minimum value of a custom distribution. + + Valid when `type`_ is `custom_distribution`. + type: number + default: 1 + + range_max: + title: Range maximum + description: | + The maximum value of a custom distribution. + + Required when `type`_ is `custom_distribution`. + type: number + + bucket_count: + title: Bucket count + description: | + The number of buckets to include in a custom distribution. + + Required when `type`_ is `custom_distribution`. + type: number + minimum: 1 + maximum: 100 + + histogram_type: + title: Histogram type + description: | + The type of histogram bucketing to use: + - `linear`: The buckets are linearly spaced within the range. + - `exponential`: The buckets use the natural logarithmic so the + smaller-valued buckets are smaller in size than the higher-valued + buckets. + + Required when `type`_ is `custom_distribution`. + enum: + - linear + - exponential + + unit: + title: Unit + description: | + The unit of the metric. + This is only required for metrics + that don't already require a meaningful unit, e.g. `quantity` + This is provided for informational purposes only and doesn't have any + effect on data collection. + + Metric types like `timespan`, `datetime` + and `timing_distribution` take a `time_unit` instead. + type: string + + no_lint: + title: Lint checks to skip + description: | + This parameter lists any lint checks to skip for this metric only. + type: array + items: + type: string + + data_sensitivity: + title: The level of data sensitivity + description: | + There are four data collection categories related to data sensitivity + [defined here](https://wiki.mozilla.org/Firefox/Data_Collection): + + - **Category 1: Technical Data:** (`technical`) Information about the + machine or Firefox itself. Examples include OS, available memory, + crashes and errors, outcome of automated processes like updates, + safebrowsing, activation, version \#s, and buildid. This also + includes compatibility information about features and APIs used by + websites, addons, and other 3rd-party software that interact with + Firefox during usage. + + - **Category 2: Interaction Data:** (`interaction`) Information about + the user’s direct engagement with Firefox. Examples include how many + tabs, addons, or windows a user has open; uses of specific Firefox + features; session length, scrolls and clicks; and the status of + discrete user preferences. + + - **Category 3: Web activity data:** (`web_activity`) Information + about user web browsing that could be considered sensitive. Examples + include users’ specific web browsing history; general information + about their web browsing history (such as TLDs or categories of + webpages visited over time); and potentially certain types of + interaction data about specific webpages visited. + + - **Category 4: Highly sensitive data:** (`highly_sensitive`) + Information that directly identifies a person, or if combined with + other data could identify a person. Examples include e-mail, + usernames, identifiers such as google ad id, apple id, fxaccount, + city or country (unless small ones are explicitly filtered out), or + certain cookies. It may be embedded within specific website content, + such as memory contents, dumps, captures of screen data, or DOM + data. + type: array + items: + enum: + - technical + - interaction + - web_activity + - highly_sensitive + type: string + minLength: 1 + uniqueItems: true + + telemetry_mirror: + title: Which probe in Telemetry to mirror this metric's value to. + description: | + The C++ enum form of the Scalar, Event, or Histogram to which we + should mirror values. + Use is limited to Firefox Desktop only. + Has no effect when used with non-FOG outputters. + See FOG's documentation on mirroring for details - + https://firefox-source-docs.mozilla.org/toolkit/components/glean/mirroring.html + type: string + minLength: 6 + + denominator_metric: + title: The name of the denominator for this `rate` metric. + description: | + Denominators for `rate` metrics may be private and internal + or shared and external. + External denominators are `counter` metrics. + This field names the `counter` metric that serves as this + `rate` metric's external denominator. + The named denominator must be defined in this component + so glean_parser can find it. + type: string + + required: + - type + - bugs + - description + - notification_emails + - data_reviews + - expires + +type: object + +propertyNames: + anyOf: + - allOf: + - $ref: "#/definitions/dotted_snake_case" + - not: + description: "'pings' is reserved as a category name." + const: pings + - not: + description: "'tags' is reserved as a category name." + const: tags + - enum: ['$schema', '$tags'] + +properties: + $schema: + type: string + format: url + + no_lint: + title: Lint checks to skip globally + description: | + This parameter lists any lint checks to skip for this whole file. + type: array + items: + type: string + + $tags: + title: Tags that apply to the whole file + description: | + This denotes the list of tags that apply to all metrics in this file. + type: array + items: + type: string + +additionalProperties: + type: object + propertyNames: + anyOf: + - $ref: "#/definitions/short_id" + additionalProperties: + allOf: + - $ref: "#/definitions/metric" + - + if: + properties: + type: + const: event + then: + properties: + lifetime: + description: | + Event metrics must have ping lifetime. + const: ping + - if: + not: + properties: + type: + enum: + - timing_distribution + - custom_distribution + - memory_distribution + - quantity + - boolean + - string + - labeled_counter + then: + properties: + gecko_datapoint: + description: | + `gecko_datapoint` is only allowed for `timing_distribution`, + `custom_distribution`, `memory_distribution`, `quantity`, + `boolean`, `string` and `labeled_counter`. + maxLength: 0 + - + if: + properties: + type: + const: custom_distribution + then: + required: + - range_max + - bucket_count + - histogram_type + description: | + `custom_distribution` is missing required parameters `range_max`, + `bucket_count` and `histogram_type`. + - + if: + properties: + type: + const: memory_distribution + then: + required: + - memory_unit + description: | + `memory_distribution` is missing required parameter `memory_unit`. + - + if: + properties: + type: + const: quantity + then: + required: + - unit + description: | + `quantity` is missing required parameter `unit`. + - + if: + properties: + type: + const: jwe + then: + required: + - jwe_support_was_removed + description: | + JWE support was removed. + If you require this send an email to glean-team@mozilla.com. + - if: + not: + properties: + type: + const: rate + then: + properties: + denominator_metric: + description: | + `denominator_metric` is only allowed for `rate`. + maxLength: 0 + - + if: + properties: + type: + const: text + then: + properties: + lifetime: + description: > + Text metrics must have ping or application lifetime. + enum: + - ping + - application + + data_sensitivity: + description: > + Text metrics require Category 3 (`web_activity`) + or Category 4 (`highly_sensitive`). + type: array + items: + enum: + - web_activity + - highly_sensitive + + send_in_pings: + description: | + Text metrics can only be sent in custom pings. + Built-in pings are not allowed. + type: array + items: + allOf: + - $ref: "#/definitions/kebab_case" + - not: + description: > + Text metrics can only be sent in custom pings. + Built-in pings are not allowed." + pattern: + "^(metrics|baseline|events|deletion-request|default|glean_.*)$" + + - + if: + # This is a schema check: + # This is true when the checked YAML passes the schema validation. + # + # If it has a datetime/timing_distribution/timespan type + # AND has a `unit` property, then... + properties: + type: + enum: + - datetime + - timing_distribution + - timespan + required: + - unit + # ... then `time_unit` is required, + # because that's the only way we can force this to fail. + then: + required: + - time_unit + description: | + This metric type uses the (optional) `time_unit` parameter, + not `unit`. diff --git a/third_party/python/glean_parser/glean_parser/schemas/pings.1-0-0.schema.yaml b/third_party/python/glean_parser/glean_parser/schemas/pings.1-0-0.schema.yaml new file mode 100644 index 0000000000..c15a4c85ac --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/schemas/pings.1-0-0.schema.yaml @@ -0,0 +1,157 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +--- +$schema: http://json-schema.org/draft-07/schema# +title: Pings +description: | + Schema for the pings.yaml files for Mozilla's Glean telemetry SDK. + + The top-level of the `pings.yaml` file has a key defining the name of each + ping. The values contain metadata about that ping. Ping names must be + kebab-case per https://docs.telemetry.mozilla.org/cookbooks/new_ping.html + +$id: moz://mozilla.org/schemas/glean/pings/1-0-0 + +definitions: + dotted_snake_case: + type: string + pattern: "^[a-z_][a-z0-9_]{0,29}(\\.[a-z_][a-z0-9_]{0,29})*$" + maxLength: 40 + kebab_case: + type: string + # Bug 1601270; we allow 3 specific existing snake_cased ping names for now, + # but these special cases can be removed once the number of legacy clients + # sufficiently dwindles, likely in 2020H2. + pattern: "^[a-z][a-z0-9-]{0,29}$\ + |^deletion_request$|^bookmarks_sync$|^history_sync$|^session_end$|^all_pings$|^glean_.*$" + +type: object + +propertyNames: + allOf: + - anyOf: + - $ref: "#/definitions/kebab_case" + - enum: ['$schema', 'no_lint'] + - not: + enum: ['all-pings'] + +properties: + $schema: + type: string + format: url + + no_lint: + title: Lint checks to skip globally + description: | + This parameter lists any lint checks to skip for this whole file. + type: array + items: + type: string + +additionalProperties: + type: object + properties: + description: + title: Description + description: | + **Required.** + + A textual description of the purpose of this ping and what it contains. + + Descriptions may contain [markdown + syntax](https://www.markdownguide.org/basic-syntax/). + type: string + + include_client_id: + title: Include client id + description: | + **Required.** + + When `true`, include the `client_id` value in the ping. + type: boolean + + send_if_empty: + title: Send if empty + description: | + When `false` a ping is sent only if it contains data (the default). + When `true` a ping is sent even if it contains no data. + type: boolean + + notification_emails: + title: Notification emails + description: | + **Required.** + + A list of email addresses to notify for important events with the + ping or when people with context or ownership for the ping need to + be contacted. + type: array + minItems: 1 + items: + type: string + format: email + + bugs: + title: Related bugs + description: | + **Required.** + + A list of bugs (e.g. Bugzilla and Github) that are relevant to this + ping, e.g., tracking its original implementation or later changes to + it. + + If a number, it is an ID to an issue in the default tracker (e.g. + Mozilla's Bugzilla instance). If a string, it must be a URI to a bug + page in a tracker. + type: array + minItems: 1 + items: + anyOf: + - type: integer # Keep supporting integer for backward-compat + - type: string + format: uri + + data_reviews: + title: Review references + description: | + **Required.** + + A list of URIs to any data collection reviews relevant to the ping. + type: array + items: + type: string + format: uri + + reasons: + title: The reasons this ping can be sent. + description: | + A list of reasons that the ping might be triggered. Sent in the ping's + `ping_info.reason` field. + + Specified as a mapping from reason codes (which are short strings), to + a textual description of the reason. + type: object + propertyNames: + type: string + maxLength: 30 + additionalProperties: + type: string + + no_lint: + title: Lint checks to skip + description: | + This parameter lists any lint checks to skip for this metric only. + type: array + items: + type: string + + required: + - description + - include_client_id + - bugs + - notification_emails + - data_reviews + + additionalProperties: false diff --git a/third_party/python/glean_parser/glean_parser/schemas/pings.2-0-0.schema.yaml b/third_party/python/glean_parser/glean_parser/schemas/pings.2-0-0.schema.yaml new file mode 100644 index 0000000000..fb0f9c1914 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/schemas/pings.2-0-0.schema.yaml @@ -0,0 +1,169 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +--- +$schema: http://json-schema.org/draft-07/schema# +title: Pings +description: | + Schema for the pings.yaml files for Mozilla's Glean telemetry SDK. + + The top-level of the `pings.yaml` file has a key defining the name of each + ping. The values contain metadata about that ping. Ping names must be + kebab-case per https://docs.telemetry.mozilla.org/cookbooks/new_ping.html + +$id: moz://mozilla.org/schemas/glean/pings/2-0-0 + +definitions: + dotted_snake_case: + type: string + pattern: "^[a-z_][a-z0-9_]{0,29}(\\.[a-z_][a-z0-9_]{0,29})*$" + maxLength: 40 + # Prior to version 2.0.0 of the schema, special ping names with underscores + # were also supported. + kebab_case: + type: string + pattern: "^[a-z][a-z0-9-]{0,29}$" + +type: object + +propertyNames: + allOf: + - anyOf: + - $ref: "#/definitions/kebab_case" + - enum: ['$schema', 'no_lint'] + - not: + enum: ['all-pings'] + +properties: + $schema: + type: string + format: url + + no_lint: + title: Lint checks to skip globally + description: | + This parameter lists any lint checks to skip for this whole file. + type: array + items: + type: string + +additionalProperties: + type: object + properties: + description: + title: Description + description: | + **Required.** + + A textual description of the purpose of this ping and what it contains. + + Descriptions may contain [markdown + syntax](https://www.markdownguide.org/basic-syntax/). + type: string + + metadata: + title: Metadata + description: | + Additional metadata about this ping. Currently limited to a list of + tags. + type: object + properties: + tags: + title: Tags + description: Which tags are specified for this ping. + type: array + items: + type: string + maxLength: 80 + default: {} + + include_client_id: + title: Include client id + description: | + **Required.** + + When `true`, include the `client_id` value in the ping. + type: boolean + + send_if_empty: + title: Send if empty + description: | + When `false` a ping is sent only if it contains data (the default). + When `true` a ping is sent even if it contains no data. + type: boolean + + notification_emails: + title: Notification emails + description: | + **Required.** + + A list of email addresses to notify for important events with the + ping or when people with context or ownership for the ping need to + be contacted. + type: array + minItems: 1 + items: + type: string + format: email + + bugs: + title: Related bugs + description: | + **Required.** + + A list of bugs (e.g. Bugzilla and Github) that are relevant to this + ping, e.g., tracking its original implementation or later changes to + it. + + It must be a URI to a bug page in a tracker. + + Prior to version 2.0.0 of the schema, bugs could also be integers. + type: array + minItems: 1 + items: + type: string + format: uri + + data_reviews: + title: Review references + description: | + **Required.** + + A list of URIs to any data collection reviews relevant to the ping. + type: array + items: + type: string + format: uri + + reasons: + title: The reasons this ping can be sent. + description: | + A list of reasons that the ping might be triggered. Sent in the ping's + `ping_info.reason` field. + + Specified as a mapping from reason codes (which are short strings), to + a textual description of the reason. + type: object + propertyNames: + type: string + maxLength: 30 + additionalProperties: + type: string + + no_lint: + title: Lint checks to skip + description: | + This parameter lists any lint checks to skip for this metric only. + type: array + items: + type: string + + required: + - description + - include_client_id + - bugs + - notification_emails + - data_reviews + + additionalProperties: false diff --git a/third_party/python/glean_parser/glean_parser/schemas/tags.1-0-0.schema.yaml b/third_party/python/glean_parser/glean_parser/schemas/tags.1-0-0.schema.yaml new file mode 100644 index 0000000000..aa0f083bcf --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/schemas/tags.1-0-0.schema.yaml @@ -0,0 +1,51 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +--- +$schema: http://json-schema.org/draft-07/schema# +title: Tags +description: | + Schema for the tags.yaml files for Mozilla's Glean telemetry SDK. + + The top-level of the `tags.yaml` file has a key defining the name of each + tag. The values contain metadata about that tag (currently just a + description). + +$id: moz://mozilla.org/schemas/glean/tags/1-0-0 + +type: object + +propertyNames: + type: string + maxLength: 80 + +properties: + $schema: + type: string + format: url + + no_lint: + title: Lint checks to skip globally + description: | + This parameter lists any lint checks to skip for this whole file. + type: array + items: + type: string + +additionalProperties: + type: object + properties: + description: + title: Description + description: | + **Required.** + + A textual description of this tag. + + Descriptions may contain [markdown + syntax](https://www.markdownguide.org/basic-syntax/). + type: string + required: + - description + additionalProperties: false diff --git a/third_party/python/glean_parser/glean_parser/swift.py b/third_party/python/glean_parser/glean_parser/swift.py new file mode 100644 index 0000000000..c745c4d9ac --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/swift.py @@ -0,0 +1,260 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Outputter to generate Swift code for metrics. +""" + +import enum +import json +from pathlib import Path +from typing import Any, Dict, Optional, Union + +from . import __version__ +from . import metrics +from . import pings +from . import tags +from . import util + +# An (imcomplete) list of reserved keywords in Swift. +# These will be replaced in generated code by their escaped form. +SWIFT_RESERVED_NAMES = ["internal", "typealias"] + + +def swift_datatypes_filter(value: util.JSONType) -> str: + """ + A Jinja2 filter that renders Swift literals. + + Based on Python's JSONEncoder, but overrides: + - dicts to use `[key: value]` + - sets to use `[...]` + - enums to use the like-named Swift enum + - Rate objects to a CommonMetricData initializer + (for external Denominators' Numerators lists) + """ + + class SwiftEncoder(json.JSONEncoder): + def iterencode(self, value): + if isinstance(value, dict): + yield "[" + first = True + for key, subvalue in value.items(): + if not first: + yield ", " + yield from self.iterencode(key) + yield ": " + yield from self.iterencode(subvalue) + first = False + yield "]" + elif isinstance(value, enum.Enum): + yield ("." + util.camelize(value.name)) + elif isinstance(value, list): + yield "[" + first = True + for subvalue in value: + if not first: + yield ", " + yield from self.iterencode(subvalue) + first = False + yield "]" + elif isinstance(value, set): + yield "[" + first = True + for subvalue in sorted(list(value)): + if not first: + yield ", " + yield from self.iterencode(subvalue) + first = False + yield "]" + elif value is None: + yield "nil" + elif isinstance(value, metrics.Rate): + yield "CommonMetricData(" + first = True + for arg_name in util.common_metric_args: + if hasattr(value, arg_name): + if not first: + yield ", " + yield f"{util.camelize(arg_name)}: " + yield from self.iterencode(getattr(value, arg_name)) + first = False + yield ")" + else: + yield from super().iterencode(value) + + return "".join(SwiftEncoder().iterencode(value)) + + +def type_name(obj: Union[metrics.Metric, pings.Ping]) -> str: + """ + Returns the Swift type to use for a given metric or ping object. + """ + generate_enums = getattr(obj, "_generate_enums", []) + if len(generate_enums): + generic = None + for member, suffix in generate_enums: + if len(getattr(obj, member)): + generic = util.Camelize(obj.name) + suffix + else: + if isinstance(obj, metrics.Event): + generic = "NoExtras" + else: + generic = "No" + suffix + + return "{}<{}>".format(class_name(obj.type), generic) + + return class_name(obj.type) + + +def extra_type_name(typ: str) -> str: + """ + Returns the corresponding Kotlin type for event's extra key types. + """ + + if typ == "boolean": + return "Bool" + elif typ == "string": + return "String" + elif typ == "quantity": + return "Int32" + else: + return "UNSUPPORTED" + + +def class_name(obj_type: str) -> str: + """ + Returns the Swift class name for a given metric or ping type. + """ + if obj_type == "ping": + return "Ping" + if obj_type.startswith("labeled_"): + obj_type = obj_type[8:] + return util.Camelize(obj_type) + "MetricType" + + +def variable_name(var: str) -> str: + """ + Returns a valid Swift variable name, escaping keywords if necessary. + """ + if var in SWIFT_RESERVED_NAMES: + return "`" + var + "`" + else: + return var + + +class BuildInfo: + def __init__(self, build_date): + self.build_date = build_date + + +def generate_build_date(date: Optional[str]) -> str: + """ + Generate the build timestamp. + """ + + ts = util.build_date(date) + + data = [ + ("year", ts.year), + ("month", ts.month), + ("day", ts.day), + ("hour", ts.hour), + ("minute", ts.minute), + ("second", ts.second), + ] + + # The internal DatetimeMetricType API can take a `DateComponents` object, + # which lets us easily specify the timezone. + components = ", ".join([f"{name}: {val}" for (name, val) in data]) + return f'DateComponents(calendar: Calendar.current, timeZone: TimeZone(abbreviation: "UTC"), {components})' # noqa + + +class Category: + """ + Data struct holding information about a metric to be used in the template. + """ + + name: str + objs: Dict[str, Union[metrics.Metric, pings.Ping, tags.Tag]] + contains_pings: bool + + +def output_swift( + objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]] = None +) -> None: + """ + Given a tree of objects, output Swift code to `output_dir`. + + :param objects: A tree of objects (metrics and pings) as returned from + `parser.parse_objects`. + :param output_dir: Path to an output directory to write to. + :param options: options dictionary, with the following optional keys: + - namespace: The namespace to generate metrics in + - glean_namespace: The namespace to import Glean from + - allow_reserved: When True, this is a Glean-internal build + - with_buildinfo: If "true" the `GleanBuildInfo` is generated. + Otherwise generation of that file is skipped. + Defaults to "true". + - build_date: If set to `0` a static unix epoch time will be used. + If set to a ISO8601 datetime string (e.g. `2022-01-03T17:30:00`) + it will use that date. + Other values will throw an error. + If not set it will use the current date & time. + """ + if options is None: + options = {} + + template = util.get_jinja2_template( + "swift.jinja2", + filters=( + ("swift", swift_datatypes_filter), + ("type_name", type_name), + ("class_name", class_name), + ("variable_name", variable_name), + ("extra_type_name", extra_type_name), + ), + ) + + namespace = options.get("namespace", "GleanMetrics") + glean_namespace = options.get("glean_namespace", "Glean") + with_buildinfo = options.get("with_buildinfo", "true").lower() == "true" + build_date = options.get("build_date", None) + build_info = None + if with_buildinfo: + build_date = generate_build_date(build_date) + build_info = BuildInfo(build_date=build_date) + + filename = "Metrics.swift" + filepath = output_dir / filename + categories = [] + + for category_key, category_val in objs.items(): + contains_pings = any( + isinstance(obj, pings.Ping) for obj in category_val.values() + ) + + cat = Category() + cat.name = category_key + cat.objs = category_val + cat.contains_pings = contains_pings + + categories.append(cat) + + with filepath.open("w", encoding="utf-8") as fd: + fd.write( + template.render( + parser_version=__version__, + categories=categories, + common_metric_args=util.common_metric_args, + extra_metric_args=util.extra_metric_args, + namespace=namespace, + glean_namespace=glean_namespace, + allow_reserved=options.get("allow_reserved", False), + build_info=build_info, + ) + ) + # Jinja2 squashes the final newline, so we explicitly add it + fd.write("\n") diff --git a/third_party/python/glean_parser/glean_parser/tags.py b/third_party/python/glean_parser/glean_parser/tags.py new file mode 100644 index 0000000000..680b99731b --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/tags.py @@ -0,0 +1,49 @@ +from typing import Dict, List, Optional +from . import util + + +class Tag: + def __init__( + self, + name: str, + description: str, + defined_in: Optional[Dict] = None, + no_lint: Optional[List[str]] = None, + _validated: bool = False, + ): + # Avoid cyclical import + from . import parser + + self.name = name + self.description = description + self.defined_in = defined_in + if no_lint is None: + no_lint = [] + self.no_lint = no_lint + + # _validated indicates whether this tag has already been jsonschema + # validated (but not any of the Python-level validation). + if not _validated: + data: Dict[str, util.JSONType] = { + "$schema": parser.TAGS_ID, + self.name: self._serialize_input(), + } + for error in parser.validate(data): + raise ValueError(error) + + @property + def type(self) -> str: + return "tag" + + def _serialize_input(self) -> Dict[str, util.JSONType]: + d = self.serialize() + modified_dict = util.remove_output_params(d, "defined_in") + return modified_dict + + def serialize(self) -> Dict[str, util.JSONType]: + """ + Serialize the tag back to JSON object model. + """ + d = self.__dict__.copy() + del d["name"] + return d diff --git a/third_party/python/glean_parser/glean_parser/templates/data_review.jinja2 b/third_party/python/glean_parser/glean_parser/templates/data_review.jinja2 new file mode 100644 index 0000000000..b3541805ed --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/templates/data_review.jinja2 @@ -0,0 +1,82 @@ +!! Reminder: it is your responsibility to complete and check the correctness of +!! this automatically-generated request skeleton before requesting Data +!! Collection Review. See https://wiki.mozilla.org/Data_Collection for details. +{# Data Review Request Template pulled from + https://github.com/mozilla/data-review/blob/main/request.md #} + +DATA REVIEW REQUEST +1. What questions will you answer with this data? + +{{ "TODO: Fill this in." if not questions }} + +2. Why does Mozilla need to answer these questions? Are there benefits for users? + Do we need this information to address product or business requirements? + +{{ "TODO: Fill this in." if not why }} + +3. What alternative methods did you consider to answer these questions? + Why were they not sufficient? + +{{ "TODO: Fill this in." if not methods }} + +4. Can current instrumentation answer these questions? + +{{ "TODO: Fill this in." if not current_instrumentation_answers }} + +5. List all proposed measurements and indicate the category of data collection for each + measurement, using the Firefox data collection categories found on the Mozilla wiki. + +Measurement Name | Measurement Description | Data Collection Category | Tracking Bug +---------------- | ----------------------- | ------------------------ | ------------ +{% for metric in metrics %} +{% if metric.type == "event" and metric.allowed_extra_keys %} +{% for extra_name, extra_detail in metric.extra_keys.items() %} +`{{ metric.category|snake_case }}.{{ metric.name|snake_case }}#{{ extra_name }} | {{ extra_detail["description"]|replace("\n", " ") }} | {{ metric.data_sensitivity|join(", ", attribute="name") }} | {{ metric.bugs|last }} +{% endfor %} +{% else %} +`{{ metric.category|snake_case }}.{{ metric.name|snake_case }}` | {{ metric.description|replace("\n", " ") }} | {{ metric.data_sensitivity|join(", ", attribute="name") }} | {{ metric.bugs|last }} +{% endif %} +{% endfor %} + +6. Please provide a link to the documentation for this data collection which + describes the ultimate data set in a public, complete, and accurate way. + +This collection is Glean so is documented [in the Glean Dictionary](https://dictionary.telemetry.mozilla.org). + +7. How long will this data be collected? + +{% if durations|length == 1 %} +{% for duration in durations %} +{% if duration == "never" %} +This collection will be collected permanently. +{% else %} +This collection has expiry '{{duration}}'. +{% endif %} +{% endfor %} +{% else %} +Parts of this collection expire at different times: {{ durations|join(", ") }}. +{% endif %} +{% if "never" in durations %} +{{ responsible_emails|join(", ") }} will be responsible for the permanent collections. +{% endif %} + +8. What populations will you measure? + +All channels, countries, and locales. No filters. + +9. If this data collection is default on, what is the opt-out mechanism for users? + +These collections are Glean. The opt-out can be found in the product's preferences. + +10. Please provide a general description of how you will analyze this data. + +{{ "TODO: Fill this in." if not analysis_how }} + +11. Where do you intend to share the results of your analysis? + +{{ "TODO: Fill this in." if not analysis_where }} + +12. Is there a third-party tool (i.e. not Glean or Telemetry) that you + are proposing to use for this data collection? + +No. diff --git a/third_party/python/glean_parser/glean_parser/templates/javascript.buildinfo.jinja2 b/third_party/python/glean_parser/glean_parser/templates/javascript.buildinfo.jinja2 new file mode 100644 index 0000000000..79968d3d0a --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/templates/javascript.buildinfo.jinja2 @@ -0,0 +1,11 @@ +/* + * AUTOGENERATED BY glean_parser v{{ parser_version }}. DO NOT EDIT. DO NOT COMMIT. + */ +{# The rendered markdown is autogenerated, but this +Jinja2 template is not. Please file bugs! #} + +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +{% if platform != "qt" %}export {% endif %}const buildDate = {{ build_date }}; diff --git a/third_party/python/glean_parser/glean_parser/templates/javascript.jinja2 b/third_party/python/glean_parser/glean_parser/templates/javascript.jinja2 new file mode 100644 index 0000000000..4036e8922f --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/templates/javascript.jinja2 @@ -0,0 +1,73 @@ +{# The final Javascript/Typescript code is autogenerated, but this +Jinja2 template is not. Please file bugs! #} +{% macro obj_declaration(obj) %} +new {{ obj.type|class_name }}{% if obj.extra_keys and lang == "typescript" %}<{ + {% for name, type in obj.allowed_extra_keys_with_types %} + {{ name }}?: {{ type|extra_type_name }}, + {% endfor %} +}>{% endif %}({ + {% for arg_name in (obj.type|args).common if obj[arg_name] is defined %} + {{ arg_name|camelize }}: {{ obj[arg_name]|js }}, + {% endfor %} +}{% for arg_name in (obj.type|args).extra if obj[arg_name] is defined %}, {{ obj[arg_name]|js }}{% endfor %}){% endmacro %} +{% macro labeled_obj_declaration(obj) %} +new {{ "labeled"|class_name }}({ + {% for arg_name in (obj.type|args).common if obj[arg_name] is defined %} + {{ arg_name|camelize }}: {{ obj[arg_name]|js }}, + {% endfor %} +}, {{ obj.type|class_name }}{% if obj.labels is not none %}, {{ obj.labels|js }}{% endif %}){% endmacro %} +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// AUTOGENERATED BY glean_parser v{{ parser_version }}. DO NOT EDIT. DO NOT COMMIT. + +{% if platform != "qt" %} +{% if has_labeled_metrics %} +import LabeledMetricType from "@mozilla/glean/private/metrics/labeled"; +{% endif %} +{% for type in types %} +import {{ type|class_name }} from "@mozilla/glean/private/{{ type|import_path }}"; +{% endfor %} +{% else %} +.import org.mozilla.Glean {{ version }} as Glean +{% endif %} + +{% for obj in objs.values() %} +/** + * {{ obj.description|wordwrap() | replace("\n", "\n * ") }} + * + * Generated from `{{ obj.identifier() }}`. + */ +{% if obj.labeled %} +{% if platform != "qt" %}export {% endif %}const {{ obj.name|camelize }} = {{ labeled_obj_declaration(obj) }}; +{% else %} +{% if platform != "qt" %}export {% endif %}const {{ obj.name|camelize }} = {{ obj_declaration(obj) }}; +{% endif %} + +{% if obj|attr("_generate_enums") %} +{% for name, suffix in obj["_generate_enums"] %} +{% if obj|attr(name)|length and name == "reason_codes" %} +/** + * Reason codes for `{{ obj.identifier() }}`. + * + * @readonly + * @enum {string} + */ +{% if lang == "typescript" %} +export enum {{ obj.name|Camelize }}{{ name|Camelize }} { + {% for key in obj|attr(name) %} + {{ key|Camelize }} = "{{ key }}", + {% endfor %} +} +{% else %} +{% if platform != "qt" %}export {% endif %}const {{ obj.name|Camelize }}{{ name|Camelize }} = { + {% for key in obj|attr(name) %} + "{{ key|Camelize }}": "{{ key }}", + {% endfor %} +} +{% endif %} +{% endif %} +{% endfor %} +{% endif %} +{% endfor %} diff --git a/third_party/python/glean_parser/glean_parser/templates/kotlin.buildinfo.jinja2 b/third_party/python/glean_parser/glean_parser/templates/kotlin.buildinfo.jinja2 new file mode 100644 index 0000000000..2e0db5b302 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/templates/kotlin.buildinfo.jinja2 @@ -0,0 +1,31 @@ +// -*- mode: kotlin -*- + +/* + * AUTOGENERATED BY glean_parser v{{ parser_version }}. DO NOT EDIT. DO NOT COMMIT. + */ +{# The rendered markdown is autogenerated, but this +Jinja2 template is not. Please file bugs! #} + +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +@file:Suppress("PackageNaming", "MaxLineLength") + +package {{ namespace }} + +import java.util.Calendar +import java.util.TimeZone +import {{ glean_namespace }}.BuildInfo +import {{ namespace_package }}.BuildConfig + +@Suppress("MagicNumber") +internal object GleanBuildInfo { + val buildInfo: BuildInfo by lazy { + BuildInfo( + versionCode = BuildConfig.VERSION_CODE.toString(), + versionName = BuildConfig.VERSION_NAME, + buildDate = {{ build_date }} + ) + } +} diff --git a/third_party/python/glean_parser/glean_parser/templates/kotlin.geckoview.jinja2 b/third_party/python/glean_parser/glean_parser/templates/kotlin.geckoview.jinja2 new file mode 100644 index 0000000000..f58c788e93 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/templates/kotlin.geckoview.jinja2 @@ -0,0 +1,124 @@ +// -*- mode: kotlin -*- + +/* + * AUTOGENERATED BY glean_parser v{{ parser_version }}. DO NOT EDIT. DO NOT COMMIT. + */ +{# The rendered markdown is autogenerated, but this +Jinja2 template is not. Please file bugs! #} + +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +@file:Suppress("PackageNaming", "MaxLineLength") +package {{ namespace }} + +import {{ glean_namespace }}.private.BooleanMetricType // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.CounterMetricType // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.HistogramMetricBase // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.LabeledMetricType // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.QuantityMetricType // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.StringMetricType // ktlint-disable import-ordering no-unused-imports + +/* + * This class performs the mapping between Gecko metrics and Glean SDK + * metric types. + */ +internal object GleanGeckoMetricsMapping { + // Support exfiltration of Gecko histograms from products using both the + // Glean SDK and GeckoView. See bug 1566356 for more context. + @Suppress("UNUSED_PARAMETER") + fun getHistogram(geckoMetricName: String): HistogramMetricBase? { + {% if 'histograms' in gecko_metrics %} + return when (geckoMetricName) { + {% for category in gecko_metrics['histograms'].keys()|sort %} + // From {{ category|Camelize }}.kt + {% for metric in gecko_metrics['histograms'][category] %} + "{{ metric.gecko_datapoint }}" -> {{ category|Camelize }}.{{ metric.name|camelize }} + {% endfor %} + {%- endfor %} + else -> null + } + {% else %} + return null + {% endif %} + } + + // Support exfiltration of Gecko categorical histograms from products using + // both the Glean SDK and GeckoView. See bug 1571740 for more context. + @Suppress("UNUSED_PARAMETER") + fun getCategoricalMetric( + geckoMetricName: String + ): LabeledMetricType? { + {% if 'categoricals' in gecko_metrics %} + return when (geckoMetricName) { + {% for category in gecko_metrics['categoricals'].keys()|sort %} + // From {{ category|Camelize }}.kt + {% for metric in gecko_metrics['categoricals'][category] %} + "{{ metric.gecko_datapoint }}" -> {{ category|Camelize }}.{{ metric.name|camelize }} + {% endfor %} + {%- endfor %} + else -> null + } + {% else %} + return null + {% endif %} + } + + // Support exfiltration of Gecko boolean scalars from products using both the + // Glean SDK and GeckoView. See bug 1579365 for more context. + @Suppress("UNUSED_PARAMETER") + fun getBooleanScalar(geckoMetricName: String): BooleanMetricType? { + {% if 'boolean' in gecko_metrics %} + return when (geckoMetricName) { + {% for category in gecko_metrics['boolean'].keys()|sort %} + // From {{ category|Camelize }}.kt + {% for metric in gecko_metrics['boolean'][category] %} + "{{ metric.gecko_datapoint }}" -> {{ category|Camelize }}.{{ metric.name|camelize }} + {% endfor %} + {%- endfor %} + else -> null + } + {% else %} + return null + {% endif %} + } + + // Support exfiltration of Gecko string scalars from products using both the + // Glean SDK and GeckoView. See bug 1579365 for more context. + @Suppress("UNUSED_PARAMETER") + fun getStringScalar(geckoMetricName: String): StringMetricType? { + {% if 'string' in gecko_metrics %} + return when (geckoMetricName) { + {% for category in gecko_metrics['string'].keys()|sort %} + // From {{ category|Camelize }}.kt + {% for metric in gecko_metrics['string'][category] %} + "{{ metric.gecko_datapoint }}" -> {{ category|Camelize }}.{{ metric.name|camelize }} + {% endfor %} + {%- endfor %} + else -> null + } + {% else %} + return null + {% endif %} + } + + // Support exfiltration of Gecko quantity scalars from products using both the + // Glean SDK and GeckoView. See bug 1579365 for more context. + @Suppress("UNUSED_PARAMETER") + fun getQuantityScalar(geckoMetricName: String): QuantityMetricType? { + {% if 'quantity' in gecko_metrics %} + return when (geckoMetricName) { + {% for category in gecko_metrics['quantity'].keys()|sort %} + // From {{ category|Camelize }}.kt + {% for metric in gecko_metrics['quantity'][category] %} + "{{ metric.gecko_datapoint }}" -> {{ category|Camelize }}.{{ metric.name|camelize }} + {% endfor %} + {%- endfor %} + else -> null + } + {% else %} + return null + {% endif %} + } +} diff --git a/third_party/python/glean_parser/glean_parser/templates/kotlin.jinja2 b/third_party/python/glean_parser/glean_parser/templates/kotlin.jinja2 new file mode 100644 index 0000000000..bd800af01d --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/templates/kotlin.jinja2 @@ -0,0 +1,133 @@ +// -*- mode: kotlin -*- + +/* + * AUTOGENERATED BY glean_parser v{{ parser_version }}. DO NOT EDIT. DO NOT COMMIT. + */ +{# The rendered markdown is autogenerated, but this +Jinja2 template is not. Please file bugs! #} + +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +{%- macro obj_declaration(obj, suffix='', access='', lazy=False) -%} +{% if (access != "private ") -%} +@get:JvmName("{{ obj.name|camelize }}{{ suffix }}") +{% endif -%} +{{ access }}val {{ obj.name|camelize }}{{ suffix }}: {{ obj|type_name }}{% if lazy %} by lazy { {%- else %} ={% endif %} // generated from {{ obj.identifier() }} +{% if obj.type == 'ping' %} + {{ obj|type_name }}( + {% for arg_name in ping_args if obj[arg_name] is defined %} + {{ arg_name|camelize }} = {{ obj[arg_name]|kotlin }}{{ "," if not loop.last }} + {% endfor %} + ) +{% else %} + {{ obj|type_name }}( + CommonMetricData( + {% for arg_name in common_metric_args if obj[arg_name] is defined %} + {{ arg_name|camelize }} = {{ obj[arg_name]|kotlin }}{{ "," if not loop.last }} + {% endfor %} + ){%- for arg_name in extra_metric_args if obj[arg_name] is defined -%} + , {{ arg_name|camelize }} = {{ obj[arg_name]|kotlin }} + {%- endfor -%} + ) +{% endif %} +{% if lazy %}}{% endif %} +{%- endmacro -%} + +{%- macro reason_enum_decl(obj, name, suffix) -%} +@Suppress("ClassNaming", "EnumNaming") +enum class {{ obj.name|camelize }}{{ suffix }} : ReasonCode { +{% for key in obj|attr(name) %} + {{ key|camelize }} { + override fun code(): Int = {{ loop.index-1 }} + }{{ "," if not loop.last }}{{ ";" if loop.last }} + +{% endfor %} +} +{%- endmacro %} + +{%- macro struct_decl(obj, name, suffix) -%} +@Suppress("ClassNaming", "EnumNaming") +data class {{ obj.name|Camelize }}{{ suffix }}( +{% for item, typ in obj|attr(name) %} + val {{ item|camelize }}: {{typ|extra_type_name}}? = null{{ "," if not loop.last }} +{% endfor %} +) : EventExtras { + override fun toExtraRecord(): Map { + val map = mutableMapOf() + + {% for item in obj|attr(name) %} + this.{{ item[0]|camelize }}?.let { + map.put("{{item[0]}}", it.toString()) + } + {% endfor %} + return map + } +} +{%- endmacro -%} + +/* ktlint-disable no-blank-line-before-rbrace */ +@file:Suppress("PackageNaming", "MaxLineLength") +package {{ namespace }} + +import {{ glean_namespace }}.private.CommonMetricData // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.EventExtras // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.HistogramType // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.Lifetime // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.MemoryUnit // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.NoExtras // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.ReasonCode // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.NoReasonCodes // ktlint-disable import-ordering no-unused-imports +import {{ glean_namespace }}.private.TimeUnit // ktlint-disable import-ordering no-unused-imports +{% for obj_type in obj_types %} +import {{ glean_namespace }}.private.{{ obj_type }} // ktlint-disable import-ordering +{% endfor %} +{% if has_labeled_metrics %} +import {{ glean_namespace }}.private.LabeledMetricType // ktlint-disable import-ordering +{% endif %} + +internal object {{ category_name|Camelize }} { +{% for obj in objs.values() %} + {% if obj.type == "ping" %} + {% if obj|attr("_generate_enums") %} + {% for name, suffix in obj["_generate_enums"] %} + {% if obj|attr(name)|length %} + {{ reason_enum_decl(obj, name, suffix)|indent }} + {% endif %} + {% endfor %} + {% endif %} + {% else %} + {% if obj|attr("_generate_enums") %} + {% for name, suffix in obj["_generate_enums"] %} + {% if obj|attr(name)|length %} + {{ struct_decl(obj, name, suffix)|indent }} + {% endif %} + {% endfor %} + {% endif %} + {% endif %} +{% endfor %} +{% for obj in objs.values() %} + {% if obj.labeled %} + {{ obj_declaration(obj, 'Label', 'private ') | indent }} + /** + * {{ obj.description|wordwrap() | replace('\n', '\n * ') }} + */ + val {{ obj.name|camelize }}: LabeledMetricType<{{ obj|type_name }}> by lazy { // generated from {{ obj.identifier() }} + LabeledMetricType( + category = {{ obj.category|kotlin }}, + name = {{ obj.name|kotlin }}, + subMetric = {{ obj.name|camelize }}Label, + disabled = {{ obj.is_disabled()|kotlin }}, + lifetime = {{ obj.lifetime|kotlin }}, + sendInPings = {{ obj.send_in_pings|kotlin }}, + labels = {{ obj.labels|kotlin }} + ) + } + {% else %} + /** + * {{ obj.description|wordwrap() | replace('\n', '\n * ') }} + */ + {{ obj_declaration(obj, lazy=obj.type != 'ping') | indent }} + {% endif %} +{%- endfor %} +} diff --git a/third_party/python/glean_parser/glean_parser/templates/markdown.jinja2 b/third_party/python/glean_parser/glean_parser/templates/markdown.jinja2 new file mode 100644 index 0000000000..9370311247 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/templates/markdown.jinja2 @@ -0,0 +1,98 @@ + +{# The rendered markdown is autogenerated, but this +Jinja2 template is not. Please file bugs! #} + +# Metrics + +This document enumerates the metrics collected by {{ project_title }} using the [Glean SDK](https://mozilla.github.io/glean/book/index.html). +This project may depend on other projects which also collect metrics. +This means you might have to go searching through the dependency tree to get a full picture of everything collected by this project. +{% if introduction_extra %} + +{{ introduction_extra }} +{% endif %} + +# Pings + +{% for ping_name in metrics_by_pings.keys()|sort %} +- [{{ ping_name }}]({{ '#' }}{{ ping_name|replace(" ","-") }}) +{% endfor %} + +{% for ping_name in metrics_by_pings.keys()|sort %} +{% raw %}##{% endraw %} {{ ping_name }} + +{% if ping_name|ping_desc and ping_name|ping_desc|length > 0 %} +{{ ping_name|ping_desc }} + +{% if ping_name|ping_docs|length > 0 %} +See the Glean SDK documentation for the [`{{ ping_name }}` ping]({{ ping_name|ping_docs }}). + +{% endif %} +{% endif %} +{% if ping_name|ping_send_if_empty %} +This ping is sent if empty. + +{% endif %} +{% if ping_name|ping_include_client_id %} +This ping includes the [client id](https://mozilla.github.io/glean/book/user/pings/index.html#the-client_info-section). + +{% endif %} +{% if ping_name|ping_data_reviews %} +**Data reviews for this ping:** + +{% for review in ping_name|ping_data_reviews %} +- <{{review}}> +{% endfor %} + +{% endif %} +{% if ping_name|ping_bugs %} +**Bugs related to this ping:** + +{% for bug in ping_name|ping_bugs %} +- {% if bug|int != 0 %}{{bug}}{% else %}<{{bug}}>{% endif %} + +{% endfor %} + +{% endif %} +{% if ping_name|ping_reasons %} +**Reasons this ping may be sent:** + +{% for (reason, desc) in ping_name|ping_reasons|dictsort %} +- `{{ reason }}`: {{ desc|indent(6, first=False) }} +{% endfor %} + +{% endif %} +All Glean pings contain built-in metrics in the [`ping_info`](https://mozilla.github.io/glean/book/user/pings/index.html#the-ping_info-section) and [`client_info`](https://mozilla.github.io/glean/book/user/pings/index.html#the-client_info-section) sections. + +{% if metrics_by_pings[ping_name] %} +In addition to those built-in metrics, the following metrics are added to the ping: + +| Name | Type | Description | Data reviews | Extras | Expiration | [Data Sensitivity](https://wiki.mozilla.org/Firefox/Data_Collection) | +| --- | --- | --- | --- | --- | --- | --- | +{% for metric in metrics_by_pings[ping_name] %} +| {{ metric.identifier() }} | +{{- '['}}{{ metric.type }}]({{ metric.type|metrics_docs }}) | +{{- metric.description|replace("\n", " ") }} | +{%- for data_review in metric.data_reviews %} +[{{ data_review|ping_review_title(loop.index) }}]({{ data_review }}){{ ", " if not loop.last }} +{%- endfor -%} | +{%- if metric|extra_info -%} +
    +{%- for property, desc in metric|extra_info %} +
  • {{ property }}{%- if desc is not none -%}: {{ desc|replace("\n", " ") }}{%- endif -%}
  • +{%- endfor -%} +
+{%- endif -%} | +{{- metric.expires }} | +{{- metric.data_sensitivity|data_sensitivity_numbers }} | +{% endfor %} +{% else %} +This ping contains no metrics. +{% endif %} + +{% endfor %} +Data categories are [defined here](https://wiki.mozilla.org/Firefox/Data_Collection). + + +{# The rendered markdown is autogenerated, but this +Jinja2 template is not. Please file bugs! #} diff --git a/third_party/python/glean_parser/glean_parser/templates/qmldir.jinja2 b/third_party/python/glean_parser/glean_parser/templates/qmldir.jinja2 new file mode 100644 index 0000000000..f511912808 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/templates/qmldir.jinja2 @@ -0,0 +1,4 @@ +{% for category in categories|sort %} +{{ category|Camelize }} {{ version }} {{ category|camelize }}.js +{% endfor %} +depends org.mozilla.Glean {{ version }} diff --git a/third_party/python/glean_parser/glean_parser/templates/rust.jinja2 b/third_party/python/glean_parser/glean_parser/templates/rust.jinja2 new file mode 100644 index 0000000000..aff78d47ed --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/templates/rust.jinja2 @@ -0,0 +1,276 @@ +// -*- mode: Rust -*- + +// AUTOGENERATED BY glean_parser v{{ parser_version }}. DO NOT EDIT. DO NOT COMMIT. +{# The rendered source is autogenerated, but this +Jinja2 template is not. Please file bugs! #} + +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +{% macro generate_extra_keys(obj) %} +{% for name, _ in obj["_generate_enums"] %} +{# we always use the `extra` suffix, because we only expose the new event API #} +{% set suffix = "Extra" %} +{% if obj|attr(name)|length %} + {{ extra_keys_with_types(obj, name, suffix)|indent }} +{% endif %} +{% endfor %} +{% endmacro %} +{% macro extra_keys_with_types(obj, name, suffix) %} +#[derive(Default, Debug, Clone, Hash, Eq, PartialEq)] +pub struct {{ obj.name|Camelize }}{{ suffix }} { + {% for item, type in obj|attr(name) %} + pub {{ item|snake_case }}: Option<{{type|extra_type_name}}>, + {% endfor %} +} + +impl ExtraKeys for {{ obj.name|Camelize }}{{ suffix }} { + const ALLOWED_KEYS: &'static [&'static str] = {{ obj.allowed_extra_keys|extra_keys }}; + + fn into_ffi_extra(self) -> ::std::collections::HashMap<::std::string::String, ::std::string::String> { + let mut map = ::std::collections::HashMap::new(); + {% for key, _ in obj|attr(name) %} + self.{{key|snake_case}}.and_then(|val| map.insert("{{key}}".to_string(), val)); + {% endfor %} + map + } +} +{% endmacro %} +{% for category in categories %} +{% if category.contains_pings %} +{% for obj in category.objs.values() %} +#[allow(non_upper_case_globals, dead_code)] +/// {{ obj.description|wordwrap() | replace('\n', '\n/// ') }} +#[rustfmt::skip] +pub static {{ obj.name|snake_case }}: ::glean::private::__export::Lazy<::glean::private::PingType> = + ::glean::private::__export::Lazy::new(|| ::glean::private::PingType::new("{{ obj.name }}", {{ obj.include_client_id|rust }}, {{ obj.send_if_empty|rust }}, {{ obj.reason_codes|rust }})); +{% endfor %} +{% else %} +pub mod {{ category.name|snake_case }} { + #[allow(unused_imports)] // HistogramType might be unusued, let's avoid warnings + use glean::{private::*, traits::ExtraKeys, traits::NoExtraKeys, CommonMetricData, HistogramType, Lifetime, TimeUnit, MemoryUnit}; + {% for obj in category.objs.values() %} + + {% if obj|attr("_generate_enums") %} +{{ generate_extra_keys(obj) }} + {%- endif %} + #[allow(non_upper_case_globals, dead_code)] + /// generated from {{ category.name }}.{{ obj.name }} + /// + /// {{ obj.description|wordwrap() | replace('\n', '\n /// ') }} + pub static {{ obj.name|snake_case }}: ::glean::private::__export::Lazy<{{ obj|type_name }}> = ::glean::private::__export::Lazy::new(|| { + {{ obj|ctor }}(CommonMetricData { + category: {{ obj.category|rust }}, + name: {{ obj.name|rust }}, + send_in_pings: {{ obj.send_in_pings|rust }}, + lifetime: {{ obj.lifetime|rust }}, + disabled: {{ obj.is_disabled()|rust }}, + ..Default::default() + } + {%- for arg_name in extra_metric_args if obj[arg_name] is defined and arg_name != 'allowed_extra_keys' -%} + , {{ obj[arg_name]|rust }} + {%- endfor -%} + {{ ", " if obj.labeled else ")\n" }} + {%- if obj.labeled -%} + {%- if obj.labels -%} + Some({{ obj.labels|rust }}) + {%- else -%} + None + {%- endif -%}) + {% endif %} + }); + {% endfor %} +} +{% endif %} +{% endfor %} +{% if metric_by_type|length > 0 %} + +#[allow(dead_code)] +pub(crate) mod __glean_metric_maps { + use std::collections::HashMap; + + use super::{id_for_extra_key, extra_keys_len}; + use crate::private::*; + +{% for typ, metrics in metric_by_type.items() %} + pub static {{typ.0}}: ::glean::private::__export::Lazy>> = ::glean::private::__export::Lazy::new(|| { + let mut map = HashMap::with_capacity({{metrics|length}}); + {% for metric in metrics %} + map.insert({{metric.0}}.into(), &super::{{metric.1}}); + {% endfor %} + map + }); + +{% endfor %} + + /// Wrapper to record an event based on its metric ID. + /// + /// # Arguments + /// + /// * `metric_id` - The metric's ID to look up + /// * `extra` - An map of (extra key id, string) pairs. + /// The map will be decoded into the appropriate `ExtraKeys` type. + /// # Returns + /// + /// Returns `Ok(())` if the event was found and `record` was called with the given `extra`, + /// or an `EventRecordingError::InvalidId` if no event by that ID exists + /// or an `EventRecordingError::InvalidExtraKey` if the `extra` map could not be deserialized. + pub(crate) fn record_event_by_id(metric_id: u32, extra: HashMap) -> Result<(), EventRecordingError> { + match metric_id { +{% for metric_id, event in events_by_id.items() %} + {{metric_id}} => { + assert!( + extra_keys_len(&super::{{event}}) != 0 || extra.is_empty(), + "No extra keys allowed, but some were passed" + ); + + super::{{event}}.record_raw(extra); + Ok(()) + } +{% endfor %} + _ => Err(EventRecordingError::InvalidId), + } + } + + /// Wrapper to record an event based on its metric ID, with a provided timestamp. + /// + /// # Arguments + /// + /// * `metric_id` - The metric's ID to look up + /// * `timestamp` - The time at which this event was recorded. + /// * `extra` - An map of (extra key id, string) pairs. + /// The map will be decoded into the appropriate `ExtraKeys` type. + /// # Returns + /// + /// Returns `Ok(())` if the event was found and `record` was called with the given `extra`, + /// or an `EventRecordingError::InvalidId` if no event by that ID exists + /// or an `EventRecordingError::InvalidExtraKey` if the event doesn't take extra pairs, + /// but some are passed in. + pub(crate) fn record_event_by_id_with_time(metric_id: MetricId, timestamp: u64, extra: HashMap) -> Result<(), EventRecordingError> { + match metric_id { +{% for metric_id, event in events_by_id.items() %} + MetricId({{metric_id}}) => { + if extra_keys_len(&super::{{event}}) == 0 && !extra.is_empty() { + return Err(EventRecordingError::InvalidExtraKey); + } + + super::{{event}}.record_with_time(timestamp, extra); + Ok(()) + } +{% endfor %} + _ => Err(EventRecordingError::InvalidId), + } + } + + /// Wrapper to record an event based on its metric ID. + /// + /// # Arguments + /// + /// * `metric_id` - The metric's ID to look up + /// * `extra` - An map of (string, string) pairs. + /// The map will be decoded into the appropriate `ExtraKeys` types. + /// # Returns + /// + /// Returns `Ok(())` if the event was found and `record` was called with the given `extra`, + /// or an `EventRecordingError::InvalidId` if no event by that ID exists + /// or an `EventRecordingError::InvalidExtraKey` if the `extra` map could not be deserialized. + pub(crate) fn record_event_by_id_with_strings(metric_id: u32, extra: HashMap) -> Result<(), EventRecordingError> { + match metric_id { +{% for metric_id, event in events_by_id.items() %} + {{metric_id}} => { + assert!( + extra_keys_len(&super::{{event}}) != 0 || extra.is_empty(), + "No extra keys allowed, but some were passed" + ); + + let extra = extra + .into_iter() + .map(|(k, v)| id_for_extra_key(&*k, &super::{{event}}).map(|k| (k, v))) + .collect::, _>>()?; + super::{{event}}.record_raw(extra); + Ok(()) + } +{% endfor %} + _ => Err(EventRecordingError::InvalidId), + } + } + + /// Wrapper to get the currently stored events for event metric. + /// + /// # Arguments + /// + /// * `metric_id` - The metric's ID to look up + /// * `ping_name` - (Optional) The ping name to look into. + /// Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// Returns the recorded events or `None` if nothing stored. + /// + /// # Panics + /// + /// Panics if no event by the given metric ID could be found. + pub(crate) fn event_test_get_value_wrapper(metric_id: u32, ping_name: Option) -> Option> { + match metric_id { +{% for metric_id, event in events_by_id.items() %} + {{metric_id}} => super::{{event}}.test_get_value(ping_name.as_deref()), +{% endfor %} + _ => panic!("No event for metric id {}", metric_id), + } + } + + /// Check the provided event for errors. + /// + /// # Arguments + /// + /// * `metric_id` - The metric's ID to look up + /// * `ping_name` - (Optional) The ping name to look into. + /// Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// Returns a string for the recorded error or `None`. + /// + /// # Panics + /// + /// Panics if no event by the given metric ID could be found. + #[allow(unused_variables)] + pub(crate) fn event_test_get_error(metric_id: u32, ping_name: Option) -> Option { + #[cfg(feature = "with_gecko")] + match metric_id { +{% for metric_id, event in events_by_id.items() %} + {{metric_id}} => test_get_errors_string!(super::{{event}}, ping_name), +{% endfor %} + _ => panic!("No event for metric id {}", metric_id), + } + + #[cfg(not(feature = "with_gecko"))] + { + return None; + } + } + + pub(crate) mod submetric_maps { + use std::sync::{ + atomic::AtomicU32, + RwLock, + }; + use super::*; + + pub(crate) const MIN_LABELED_SUBMETRIC_ID: u32 = {{min_submetric_id}}; + pub(crate) static NEXT_LABELED_SUBMETRIC_ID: AtomicU32 = AtomicU32::new(MIN_LABELED_SUBMETRIC_ID); + pub(crate) static LABELED_METRICS_TO_IDS: ::glean::private::__export::Lazy>> = ::glean::private::__export::Lazy::new(|| + RwLock::new(HashMap::new()) + ); + +{% for typ, metrics in metric_by_type.items() %} +{% if typ.0 in ('BOOLEAN_MAP', 'COUNTER_MAP', 'STRING_MAP') %} + pub static {{typ.0}}: ::glean::private::__export::Lazy>> = ::glean::private::__export::Lazy::new(|| + RwLock::new(HashMap::new()) + ); +{% endif %} +{% endfor%} + } +} +{% endif %} diff --git a/third_party/python/glean_parser/glean_parser/templates/swift.jinja2 b/third_party/python/glean_parser/glean_parser/templates/swift.jinja2 new file mode 100644 index 0000000000..10e2f61001 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/templates/swift.jinja2 @@ -0,0 +1,138 @@ +// -*- mode: Swift -*- + +// AUTOGENERATED BY glean_parser v{{ parser_version }}. DO NOT EDIT. DO NOT COMMIT. +{# The rendered markdown is autogenerated, but this +Jinja2 template is not. Please file bugs! #} + +#if canImport(Foundation) + import Foundation +#endif + +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +{% macro obj_declaration(obj, suffix='', access='') %} +{{ access }}static let {{ obj.name|camelize|variable_name }}{{ suffix }} = {{ obj|type_name }}( // generated from {{ obj.identifier() }} + CommonMetricData( + {% for arg_name in common_metric_args if obj[arg_name] is defined %} + {{ arg_name|camelize }}: {{ obj[arg_name]|swift }}{{ "," if not loop.last }} + {% endfor %} + ) + {% for arg_name in extra_metric_args if obj[arg_name] is defined %} + , {{ obj[arg_name]|swift }} + {% endfor %} + ) +{% endmacro %} + +{% macro struct_decl(obj, name, suffix) %} +struct {{ obj.name|Camelize }}{{ suffix }}: EventExtras { + {% for item, typ in obj|attr(name) %} + var {{ item|camelize|variable_name }}: {{typ|extra_type_name}}? + {% endfor %} + + func toExtraRecord() -> [String: String] { + var record = [String: String]() + + {% for item in obj|attr(name) %} + if let {{ item[0]|camelize }} = self.{{item[0]|camelize}} { + record["{{item[0]}}"] = String({{ item[0]|camelize }}) + } + {% endfor %} + + return record + } + } +{% endmacro %} + +{% if not allow_reserved %} +import {{ glean_namespace }} + +{% endif %} +// swiftlint:disable superfluous_disable_command +// swiftlint:disable nesting +// swiftlint:disable line_length +// swiftlint:disable identifier_name +// swiftlint:disable force_try + +extension {{ namespace }} { + {% if build_info %} + class GleanBuild { + private init() { + // Intentionally left private, no external user can instantiate a new global object. + } + + public static let info = BuildInfo(buildDate: {{ build_info.build_date }}) + } + {% endif %} + + {% for category in categories %} + {% if category.contains_pings %} + class {{ category.name|Camelize }} { + public static let shared = {{ category.name|Camelize }}() + private init() { + // Intentionally left private, no external user can instantiate a new global object. + } + + {% for obj in category.objs.values() %} + {% if obj|attr("_generate_enums") %} + {% for name, suffix in obj["_generate_enums"] %} + {% if obj|attr(name)|length %} + enum {{ obj.name|Camelize }}{{ suffix }}: Int, ReasonCodes { + {% for key in obj|attr(name) %} + case {{ key|camelize|variable_name }} = {{ loop.index-1 }} + {% endfor %} + + public func index() -> Int { + return self.rawValue + } + } + + {% endif %} + {% endfor %} + {% endif %} + /// {{ obj.description|wordwrap() | replace('\n', '\n /// ') }} + let {{ obj.name|camelize|variable_name }} = {{obj|type_name}}( + name: {{ obj.name|swift }}, + includeClientId: {{obj.include_client_id|swift}}, + sendIfEmpty: {{obj.send_if_empty|swift}}, + reasonCodes: {{obj.reason_codes|swift}} + ) + + {% endfor %} + } + + {% else %} + enum {{ category.name|Camelize }} { + {% for obj in category.objs.values() %} + {% if obj|attr("_generate_enums") %} + {% for name, suffix in obj["_generate_enums"] %} + {% if obj|attr(name)|length %} + {{ struct_decl(obj, name, suffix)|indent }} + {% endif %} + {% endfor %} + {% endif %} + {% endfor %} + {% for obj in category.objs.values() %} + {% if obj.labeled %} + {{ obj_declaration(obj, 'Label', 'private ') | indent }} + /// {{ obj.description|wordwrap() | replace('\n', '\n /// ') }} + static let {{ obj.name|camelize|variable_name }} = try! LabeledMetricType<{{ obj|type_name }}>( // generated from {{ obj.identifier() }} + category: {{ obj.category|swift }}, + name: {{ obj.name|swift }}, + sendInPings: {{ obj.send_in_pings|swift }}, + lifetime: {{ obj.lifetime|swift }}, + disabled: {{ obj.is_disabled()|swift }}, + subMetric: {{ obj.name|camelize }}Label, + labels: {{ obj.labels|swift }} + ) + + {% else %} + /// {{ obj.description|wordwrap() | replace('\n', '\n /// ') }} + {{ obj_declaration(obj) | indent }} + {% endif %} + {% endfor %} + } + + {% endif %} + {% endfor %} +} diff --git a/third_party/python/glean_parser/glean_parser/translate.py b/third_party/python/glean_parser/glean_parser/translate.py new file mode 100644 index 0000000000..ecb7515c05 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/translate.py @@ -0,0 +1,227 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +High-level interface for translating `metrics.yaml` into other formats. +""" + +from pathlib import Path +import os +import shutil +import sys +import tempfile +from typing import Any, Callable, Dict, Iterable, List, Optional + +from . import lint +from . import parser +from . import javascript +from . import kotlin +from . import markdown +from . import metrics +from . import rust +from . import swift +from . import util + + +class Outputter: + """ + Class to define an output format. + + Each outputter in the table has the following member values: + + - output_func: the main function of the outputter, the one which + does the actual translation. + + - clear_patterns: A list of glob patterns to clear in the directory before + writing new results to it. + """ + + def __init__( + self, + output_func: Callable[[metrics.ObjectTree, Path, Dict[str, Any]], None], + clear_patterns: Optional[List[str]] = None, + ): + if clear_patterns is None: + clear_patterns = [] + + self.output_func = output_func + self.clear_patterns = clear_patterns + + +OUTPUTTERS = { + "javascript": Outputter(javascript.output_javascript, []), + "typescript": Outputter(javascript.output_typescript, []), + "kotlin": Outputter(kotlin.output_kotlin, ["*.kt"]), + "markdown": Outputter(markdown.output_markdown, []), + "swift": Outputter(swift.output_swift, ["*.swift"]), + "rust": Outputter(rust.output_rust, []), +} + + +def transform_metrics(objects): + """ + Transform the object model from one that represents the YAML definitions + to one that reflects the type specifics needed by code generators. + + e.g. This will transform a `rate` to be a `numerator` if its denominator is + external. + """ + counters = {} + numerators_by_denominator: Dict[str, Any] = {} + for category_name, category_val in objects.items(): + if category_name == "tags": + continue + for metric in category_val.values(): + fqmn = metric.identifier() + if getattr(metric, "type", None) == "counter": + counters[fqmn] = metric + denominator_name = getattr(metric, "denominator_metric", None) + if denominator_name: + metric.type = "numerator" + numerators_by_denominator.setdefault(denominator_name, []) + numerators_by_denominator[denominator_name].append(metric) + + for denominator_name, numerators in numerators_by_denominator.items(): + if denominator_name not in counters: + raise ValueError( + f"No `counter` named {denominator_name} found to be used as" + "denominator for {numerators}", + file=sys.stderr, + ) + counters[denominator_name].__class__ = metrics.Denominator + counters[denominator_name].type = "denominator" + counters[denominator_name].numerators = numerators + + +def translate_metrics( + input_filepaths: Iterable[Path], + output_dir: Path, + translation_func: Callable[[metrics.ObjectTree, Path, Dict[str, Any]], None], + clear_patterns: Optional[List[str]] = None, + options: Optional[Dict[str, Any]] = None, + parser_config: Optional[Dict[str, Any]] = None, +): + """ + Translate the files in `input_filepaths` by running the metrics through a + translation function and writing the results in `output_dir`. + + :param input_filepaths: list of paths to input metrics.yaml files + :param output_dir: the path to the output directory + :param translation_func: the function that actually performs the translation. + It is passed the following arguments: + + - metrics_objects: The tree of metrics as pings as returned by + `parser.parse_objects`. + - output_dir: The path to the output directory. + - options: A dictionary of output format-specific options. + + Examples of translation functions are in `kotlin.py` and `swift.py`. + :param clear_patterns: a list of glob patterns of files to clear before + generating the output files. By default, no files will be cleared (i.e. + the directory should be left alone). + :param options: dictionary of options. The available options are backend + format specific. These are passed unchanged to `translation_func`. + :param parser_config: A dictionary of options that change parsing behavior. + See `parser.parse_metrics` for more info. + """ + if clear_patterns is None: + clear_patterns = [] + + if options is None: + options = {} + + if parser_config is None: + parser_config = {} + + input_filepaths = util.ensure_list(input_filepaths) + + allow_missing_files = parser_config.get("allow_missing_files", False) + if not input_filepaths and not allow_missing_files: + print("❌ No metric files specified. ", end="") + print("Use `--allow-missing-files` to not treat this as an error.") + return 1 + + if lint.glinter(input_filepaths, parser_config): + return 1 + + all_objects = parser.parse_objects(input_filepaths, parser_config) + + if util.report_validation_errors(all_objects): + return 1 + + # allow_reserved is also relevant to the translators, so copy it there + if parser_config.get("allow_reserved"): + options["allow_reserved"] = True + + # We don't render tags anywhere yet. + all_objects.value.pop("tags", None) + + # Apply additional general transformations to all metrics + transform_metrics(all_objects.value) + + # Write everything out to a temporary directory, and then move it to the + # real directory, for transactional integrity. + with tempfile.TemporaryDirectory() as tempdir: + tempdir_path = Path(tempdir) + translation_func(all_objects.value, tempdir_path, options) + + if output_dir.is_file(): + output_dir.unlink() + elif output_dir.is_dir() and len(clear_patterns): + for clear_pattern in clear_patterns: + for filepath in output_dir.glob(clear_pattern): + filepath.unlink() + if len(list(output_dir.iterdir())): + print(f"Extra contents found in '{output_dir}'.") + + # We can't use shutil.copytree alone if the directory already exists. + # However, if it doesn't exist, make sure to create one otherwise + # shutil.copy will fail. + os.makedirs(str(output_dir), exist_ok=True) + for filename in tempdir_path.glob("*"): + shutil.copy(str(filename), str(output_dir)) + + return 0 + + +def translate( + input_filepaths: Iterable[Path], + output_format: str, + output_dir: Path, + options: Optional[Dict[str, Any]] = None, + parser_config: Optional[Dict[str, Any]] = None, +): + """ + Translate the files in `input_filepaths` to the given `output_format` and + put the results in `output_dir`. + + :param input_filepaths: list of paths to input metrics.yaml files + :param output_format: the name of the output format + :param output_dir: the path to the output directory + :param options: dictionary of options. The available options are backend + format specific. + :param parser_config: A dictionary of options that change parsing behavior. + See `parser.parse_metrics` for more info. + """ + if options is None: + options = {} + + if parser_config is None: + parser_config = {} + + format_desc = OUTPUTTERS.get(output_format, None) + + if format_desc is None: + raise ValueError(f"Unknown output format '{output_format}'") + + return translate_metrics( + input_filepaths, + output_dir, + format_desc.output_func, + format_desc.clear_patterns, + options, + parser_config, + ) diff --git a/third_party/python/glean_parser/glean_parser/translation_options.py b/third_party/python/glean_parser/glean_parser/translation_options.py new file mode 100644 index 0000000000..48774fee3c --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/translation_options.py @@ -0,0 +1,54 @@ +import pydoc + + +def translate_options(ctx, param, value): + text = """Target language options for Translate function + +These are backend specific and optional, provide as key:value + +Rust: no options. + +Swift: +- `namespace`: The namespace to generate metrics in +- `glean_namespace`: The namespace to import Glean from +- `allow_reserved`: When True, this is a Glean-internal build +- `with_buildinfo`: If "true" the `GleanBuildInfo` is generated. + Otherwise generation of that file is skipped. + Defaults to "true". +- `build_date`: If set to `0` a static unix epoch time will be used. + If set to a ISO8601 datetime string (e.g. `2022-01-03T17:30:00`) + it will use that date. + Other values will throw an error. + If not set it will use the current date & time. + +Kotlin: +- `namespace`: The package namespace to declare at the top of the + generated files. Defaults to `GleanMetrics`. +- `glean_namespace`: The package namespace of the glean library itself. + This is where glean objects will be imported from in the generated + code. + +JavaScript: +- `platform`: Which platform are we building for. Options are `webext` and `qt`. + Default is `webext`. +- `version`: The version of the Glean.js Qt library being used. + This option is mandatory when targeting Qt. Note that the version + string must only contain the major and minor version i.e. 0.14. +- `with_buildinfo`: If "true" a `gleanBuildInfo.(js|ts)` file is generated. + Otherwise generation of that file is skipped. Defaults to "false". +- `build_date`: If set to `0` a static unix epoch time will be used. + If set to a ISO8601 datetime string (e.g. `2022-01-03T17:30:00`) + it will use that date. + Other values will throw an error. + If not set it will use the current date & time. + +Markdown: +- `project_title`: The project's title. + +(press q to exit)""" + + if value: + if value[0].lower() == "help": + pydoc.pager(text) + ctx.exit() + return value diff --git a/third_party/python/glean_parser/glean_parser/util.py b/third_party/python/glean_parser/glean_parser/util.py new file mode 100644 index 0000000000..3b8b24cd78 --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/util.py @@ -0,0 +1,560 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from collections import OrderedDict +import datetime +import functools +import json +from pathlib import Path +import sys +import textwrap +from typing import Any, Callable, Iterable, Sequence, Tuple, Union, Optional +import urllib.request + +import appdirs # type: ignore +import diskcache # type: ignore +import jinja2 +import jsonschema # type: ignore +from jsonschema import _utils # type: ignore +import yaml + +if sys.version_info < (3, 7): + import iso8601 # type: ignore + + def date_fromisoformat(datestr: str) -> datetime.date: + try: + return iso8601.parse_date(datestr).date() + except iso8601.ParseError: + raise ValueError() + + def datetime_fromisoformat(datestr: str) -> datetime.datetime: + try: + return iso8601.parse_date(datestr) + except iso8601.ParseError: + raise ValueError() + +else: + + def date_fromisoformat(datestr: str) -> datetime.date: + return datetime.date.fromisoformat(datestr) + + def datetime_fromisoformat(datestr: str) -> datetime.datetime: + return datetime.datetime.fromisoformat(datestr) + + +TESTING_MODE = "pytest" in sys.modules + + +JSONType = Union[list, dict, str, int, float, None] +""" +The types supported by JSON. + +This is only an approximation -- this should really be a recursive type. +""" + +# Adapted from +# https://stackoverflow.com/questions/34667108/ignore-dates-and-times-while-parsing-yaml + + +# A wrapper around OrderedDict for Python < 3.7 (where dict ordering is not +# maintained by default), and regular dict everywhere else. +if sys.version_info < (3, 7): + + class DictWrapper(OrderedDict): + pass + +else: + + class DictWrapper(dict): + pass + + +class _NoDatesSafeLoader(yaml.SafeLoader): + @classmethod + def remove_implicit_resolver(cls, tag_to_remove): + """ + Remove implicit resolvers for a particular tag + + Takes care not to modify resolvers in super classes. + + We want to load datetimes as strings, not dates, because we + go on to serialise as json which doesn't have the advanced types + of yaml, and leads to incompatibilities down the track. + """ + if "yaml_implicit_resolvers" not in cls.__dict__: + cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() + + for first_letter, mappings in cls.yaml_implicit_resolvers.items(): + cls.yaml_implicit_resolvers[first_letter] = [ + (tag, regexp) for tag, regexp in mappings if tag != tag_to_remove + ] + + +# Since we use JSON schema to validate, and JSON schema doesn't support +# datetimes, we don't want the YAML loader to give us datetimes -- just +# strings. +_NoDatesSafeLoader.remove_implicit_resolver("tag:yaml.org,2002:timestamp") + + +def yaml_load(stream): + """ + Map line number to yaml nodes, and preserve the order + of metrics as they appear in the metrics.yaml file. + """ + + class SafeLineLoader(_NoDatesSafeLoader): + pass + + def _construct_mapping_adding_line(loader, node): + loader.flatten_mapping(node) + mapping = DictWrapper(loader.construct_pairs(node)) + mapping.defined_in = {"line": node.start_mark.line} + return mapping + + SafeLineLoader.add_constructor( + yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _construct_mapping_adding_line + ) + return yaml.load(stream, SafeLineLoader) + + +def ordered_yaml_dump(data, **kwargs): + class OrderedDumper(yaml.Dumper): + pass + + def _dict_representer(dumper, data): + return dumper.represent_mapping( + yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items() + ) + + OrderedDumper.add_representer(DictWrapper, _dict_representer) + return yaml.dump(data, Dumper=OrderedDumper, **kwargs) + + +def load_yaml_or_json(path: Path): + """ + Load the content from either a .json or .yaml file, based on the filename + extension. + + :param path: `pathlib.Path` object + :rtype object: The tree of objects as a result of parsing the file. + :raises ValueError: The file is neither a .json, .yml or .yaml file. + :raises FileNotFoundError: The file does not exist. + """ + # If in py.test, support bits of literal JSON/YAML content + if TESTING_MODE and isinstance(path, dict): + return yaml_load(yaml.dump(path)) + + if path.suffix == ".json": + with path.open("r", encoding="utf-8") as fd: + return json.load(fd) + elif path.suffix in (".yml", ".yaml", ".yamlx"): + with path.open("r", encoding="utf-8") as fd: + return yaml_load(fd) + else: + raise ValueError(f"Unknown file extension {path.suffix}") + + +def ensure_list(value: Any) -> Sequence[Any]: + """ + Ensures that the value is a list. If it is anything but a list or tuple, a + list with a single element containing only value is returned. + """ + if not isinstance(value, (list, tuple)): + return [value] + return value + + +def to_camel_case(input: str, capitalize_first_letter: bool) -> str: + """ + Convert the value to camelCase. + + This additionally replaces any '.' with '_'. The first letter is capitalized + depending on `capitalize_first_letter`. + """ + sanitized_input = input.replace(".", "_").replace("-", "_") + # Filter out any empty token. This could happen due to leading '_' or + # consecutive '__'. + tokens = [s.capitalize() for s in sanitized_input.split("_") if len(s) != 0] + # If we're not meant to capitalize the first letter, then lowercase it. + if not capitalize_first_letter: + tokens[0] = tokens[0].lower() + # Finally join the tokens and capitalize. + return "".join(tokens) + + +def camelize(value: str) -> str: + """ + Convert the value to camelCase (with a lower case first letter). + + This is a thin wrapper around inflection.camelize that handles dots in + addition to underscores. + """ + return to_camel_case(value, False) + + +def Camelize(value: str) -> str: + """ + Convert the value to CamelCase (with an upper case first letter). + + This is a thin wrapper around inflection.camelize that handles dots in + addition to underscores. + """ + return to_camel_case(value, True) + + +def snake_case(value: str) -> str: + """ + Convert the value to snake_case. + """ + return value.lower().replace(".", "_").replace("-", "_") + + +def screaming_case(value: str) -> str: + """ + Convert the value to SCREAMING_SNAKE_CASE. + """ + return value.upper().replace(".", "_").replace("-", "_") + + +@functools.lru_cache() +def get_jinja2_template( + template_name: str, filters: Iterable[Tuple[str, Callable]] = () +): + """ + Get a Jinja2 template that ships with glean_parser. + + The template has extra filters for camel-casing identifiers. + + :param template_name: Name of a file in ``glean_parser/templates`` + :param filters: tuple of 2-tuple. A tuple of (name, func) pairs defining + additional filters. + """ + env = jinja2.Environment( + loader=jinja2.PackageLoader("glean_parser", "templates"), + trim_blocks=True, + lstrip_blocks=True, + ) + + env.filters["camelize"] = camelize + env.filters["Camelize"] = Camelize + env.filters["scream"] = screaming_case + for filter_name, filter_func in filters: + env.filters[filter_name] = filter_func + + return env.get_template(template_name) + + +def keep_value(f): + """ + Wrap a generator so the value it returns (rather than yields), will be + accessible on the .value attribute when the generator is exhausted. + """ + + class ValueKeepingGenerator(object): + def __init__(self, g): + self.g = g + self.value = None + + def __iter__(self): + self.value = yield from self.g + + @functools.wraps(f) + def g(*args, **kwargs): + return ValueKeepingGenerator(f(*args, **kwargs)) + + return g + + +def get_null_resolver(schema): + """ + Returns a JSON Pointer resolver that does nothing. + + This lets us handle the moz: URLs in our schemas. + """ + + class NullResolver(jsonschema.RefResolver): + def resolve_remote(self, uri): + if uri in self.store: + return self.store[uri] + if uri == "": + return self.referrer + + return NullResolver.from_schema(schema) + + +def fetch_remote_url(url: str, cache: bool = True): + """ + Fetches the contents from an HTTP url or local file path, and optionally + caches it to disk. + """ + # Include the Python version in the cache key, since caches aren't + # sharable across Python versions. + key = (url, str(sys.version_info)) + + is_http = url.startswith("http") + + if not is_http: + with open(url, "r", encoding="utf-8") as fd: + return fd.read() + + if cache: + cache_dir = appdirs.user_cache_dir("glean_parser", "mozilla") + with diskcache.Cache(cache_dir) as dc: + if key in dc: + return dc[key] + + contents: str = urllib.request.urlopen(url).read() + + if cache: + with diskcache.Cache(cache_dir) as dc: + dc[key] = contents + + return contents + + +_unset = _utils.Unset() + + +def pprint_validation_error(error) -> str: + """ + A version of jsonschema's ValidationError __str__ method that doesn't + include the schema fragment that failed. This makes the error messages + much more succinct. + + It also shows any subschemas of anyOf/allOf that failed, if any (what + jsonschema calls "context"). + """ + essential_for_verbose = ( + error.validator, + error.validator_value, + error.instance, + error.schema, + ) + if any(m is _unset for m in essential_for_verbose): + return textwrap.fill(error.message) + + instance = error.instance + for path in list(error.relative_path)[::-1]: + if isinstance(path, str): + instance = {path: instance} + else: + instance = [instance] + + yaml_instance = ordered_yaml_dump(instance, width=72, default_flow_style=False) + + parts = ["```", yaml_instance.rstrip(), "```", "", textwrap.fill(error.message)] + if error.context: + parts.extend( + textwrap.fill(x.message, initial_indent=" ", subsequent_indent=" ") + for x in error.context + ) + + description = error.schema.get("description") + if description: + parts.extend( + ["", "Documentation for this node:", textwrap.indent(description, " ")] + ) + + return "\n".join(parts) + + +def format_error( + filepath: Union[str, Path], + header: str, + content: str, + lineno: Optional[int] = None, +) -> str: + """ + Format a jsonshema validation error. + """ + if isinstance(filepath, Path): + filepath = filepath.resolve() + else: + filepath = "" + if lineno: + filepath = f"{filepath}:{lineno}" + if header: + return f"{filepath}: {header}\n{textwrap.indent(content, ' ')}" + else: + return f"{filepath}:\n{textwrap.indent(content, ' ')}" + + +def parse_expiration_date(expires: str) -> datetime.date: + """ + Parses the expired field date (yyyy-mm-dd) as a date. + Raises a ValueError in case the string is not properly formatted. + """ + try: + return date_fromisoformat(expires) + except (TypeError, ValueError): + raise ValueError( + f"Invalid expiration date '{expires}'. " + "Must be of the form yyyy-mm-dd in UTC." + ) + + +def parse_expiration_version(expires: str) -> int: + """ + Parses the expired field version string as an integer. + Raises a ValueError in case the string does not contain a valid + positive integer. + """ + try: + if isinstance(expires, int): + version_number = int(expires) + if version_number > 0: + return version_number + # Fall-through: if it's not an integer or is not greater than zero, + # raise an error. + raise ValueError() + except ValueError: + raise ValueError( + f"Invalid expiration version '{expires}'. Must be a positive integer." + ) + + +def is_expired(expires: str, major_version: Optional[int] = None) -> bool: + """ + Parses the `expires` field in a metric or ping and returns whether + the object should be considered expired. + """ + if expires == "never": + return False + elif expires == "expired": + return True + elif major_version is not None: + return parse_expiration_version(expires) <= major_version + else: + date = parse_expiration_date(expires) + return date <= datetime.datetime.utcnow().date() + + +def validate_expires(expires: str, major_version: Optional[int] = None) -> None: + """ + If expiration by major version is enabled, raises a ValueError in + case `expires` is not a positive integer. + Otherwise raises a ValueError in case the `expires` is not ISO8601 + parseable, or in case the date is more than 730 days (~2 years) in + the future. + """ + if expires in ("never", "expired"): + return + + if major_version is not None: + parse_expiration_version(expires) + # Don't need to keep parsing dates if expiration by version + # is enabled. We don't allow mixing dates and versions for a + # single product. + return + + date = parse_expiration_date(expires) + max_date = datetime.datetime.now() + datetime.timedelta(days=730) + if date > max_date.date(): + raise ValueError( + f"'{expires}' is more than 730 days (~2 years) in the future.", + "Please make sure this is intentional.", + "You can supress this warning by adding EXPIRATION_DATE_TOO_FAR to no_lint", + "See: https://mozilla.github.io/glean_parser/metrics-yaml.html#no_lint", + ) + + +def build_date(date: Optional[str]) -> datetime.datetime: + """ + Generate the build timestamp. + + If `date` is set to `0` a static unix epoch time will be used. + If `date` it is set to a ISO8601 datetime string (e.g. `2022-01-03T17:30:00`) + it will use that date. + Note that any timezone offset will be ignored and UTC will be used. + Otherwise it will throw an error. + + If `date` is `None` it will use the current date & time. + """ + + if date is not None: + date = str(date) + if date == "0": + ts = datetime.datetime(1970, 1, 1, 0, 0, 0) + else: + ts = datetime_fromisoformat(date).replace(tzinfo=datetime.timezone.utc) + else: + ts = datetime.datetime.utcnow() + + return ts + + +def report_validation_errors(all_objects): + """ + Report any validation errors found to the console. + + Returns the number of errors reported. + """ + found_errors = 0 + for error in all_objects: + found_errors += 1 + print("=" * 78, file=sys.stderr) + print(error, file=sys.stderr) + return found_errors + + +def remove_output_params(d, output_params): + """ + Remove output-only params, such as "defined_in", + in order to validate the output against the input schema. + """ + modified_dict = {} + for key, value in d.items(): + if key is not output_params: + modified_dict[key] = value + return modified_dict + + +# Names of parameters to pass to all metrics constructors constructors. +common_metric_args = [ + "category", + "name", + "send_in_pings", + "lifetime", + "disabled", +] + + +# Names of parameters that only apply to some of the metrics types. +# **CAUTION**: This list needs to be in the order the Swift & Rust type constructors +# expects them. (The other language bindings don't care about the order). +extra_metric_args = [ + "time_unit", + "memory_unit", + "allowed_extra_keys", + "reason_codes", + "range_min", + "range_max", + "bucket_count", + "histogram_type", + "numerators", +] + + +# This includes only things that the language bindings care about, not things +# that are metadata-only or are resolved into other parameters at parse time. +# **CAUTION**: This list needs to be in the order the Swift & Rust type constructors +# expects them. (The other language bindings don't care about the order). The +# `test_order_of_fields` test checks that the generated code is valid. +# **DO NOT CHANGE THE ORDER OR ADD NEW FIELDS IN THE MIDDLE** +metric_args = common_metric_args + extra_metric_args + + +# Names of ping parameters to pass to constructors. +ping_args = [ + "name", + "include_client_id", + "send_if_empty", + "reason_codes", +] + + +# Names of parameters to pass to both metric and ping constructors (no duplicates). +extra_args = metric_args + [v for v in ping_args if v not in metric_args] diff --git a/third_party/python/glean_parser/glean_parser/validate_ping.py b/third_party/python/glean_parser/glean_parser/validate_ping.py new file mode 100644 index 0000000000..33598149eb --- /dev/null +++ b/third_party/python/glean_parser/glean_parser/validate_ping.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Validates the contents of a Glean ping against the schema. +""" + +import functools +import io +import json +from pathlib import Path +import sys + +import jsonschema # type: ignore + +from . import util + + +ROOT_DIR = Path(__file__).parent +SCHEMAS_DIR = ROOT_DIR / "schemas" + + +@functools.lru_cache(maxsize=1) +def _get_ping_schema(schema_url): + contents = util.fetch_remote_url(schema_url) + return json.loads(contents) + + +def _validate_ping(ins, outs, schema_url): + schema = _get_ping_schema(schema_url) + + resolver = util.get_null_resolver(schema) + + document = json.load(ins) + + validator_class = jsonschema.validators.validator_for(schema) + validator = validator_class(schema, resolver=resolver) + + has_error = 0 + for error in validator.iter_errors(document): + outs.write("=" * 76) + outs.write("\n") + outs.write(util.format_error("", "", util.pprint_validation_error(error))) + outs.write("\n") + has_error = 1 + + return has_error + + +def validate_ping(ins, outs=None, schema_url=None): + """ + Validates the contents of a Glean ping. + + :param ins: Input stream or file path to the ping contents to validate + :param outs: Output stream to write errors to. (Defaults to stdout) + :param schema_url: HTTP URL or local filesystem path to Glean ping schema. + Defaults to the current version of the schema in + mozilla-pipeline-schemas. + :rtype: int 1 if any errors occurred, otherwise 0. + """ + if schema_url is None: + raise TypeError("Missing required argument 'schema_url'") + + if outs is None: + outs = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8") + + if isinstance(ins, (str, bytes, Path)): + with open(ins, "r", encoding="utf-8") as fd: + return _validate_ping(fd, outs, schema_url=schema_url) + else: + return _validate_ping(ins, outs, schema_url=schema_url) -- cgit v1.2.3