487 lines
18 KiB
Python
487 lines
18 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
import atexit
|
|
import re
|
|
|
|
import yaml
|
|
|
|
from . import shared_telemetry_utils as utils
|
|
from .shared_telemetry_utils import ParserError
|
|
|
|
atexit.register(ParserError.exit_func)
|
|
|
|
# The map of containing the allowed scalar types and their mapping to
|
|
# nsITelemetry::SCALAR_TYPE_* type constants.
|
|
|
|
BASE_DOC_URL = (
|
|
"https://firefox-source-docs.mozilla.org/toolkit/components/"
|
|
+ "telemetry/telemetry/collection/scalars.html"
|
|
)
|
|
|
|
SCALAR_TYPES_MAP = {
|
|
"uint": "nsITelemetry::SCALAR_TYPE_COUNT",
|
|
"string": "nsITelemetry::SCALAR_TYPE_STRING",
|
|
"boolean": "nsITelemetry::SCALAR_TYPE_BOOLEAN",
|
|
}
|
|
|
|
|
|
class ScalarType:
|
|
"""A class for representing a scalar definition."""
|
|
|
|
def __init__(self, category_name, probe_name, definition, strict_type_checks):
|
|
# Validate and set the name, so we don't need to pass it to the other
|
|
# validation functions.
|
|
self._strict_type_checks = strict_type_checks
|
|
self.validate_names(category_name, probe_name)
|
|
self._name = probe_name
|
|
self._category_name = category_name
|
|
|
|
# Validating the scalar definition.
|
|
self.validate_types(definition)
|
|
self.validate_values(definition)
|
|
|
|
# Everything is ok, set the rest of the data.
|
|
self._definition = definition
|
|
self._expires = utils.add_expiration_postfix(definition["expires"])
|
|
|
|
def validate_names(self, category_name, probe_name):
|
|
"""Validate the category and probe name:
|
|
- Category name must be alpha-numeric + '.', no leading/trailing digit or '.'.
|
|
- Probe name must be alpha-numeric + '_', no leading/trailing digit or '_'.
|
|
|
|
:param category_name: the name of the category the probe is in.
|
|
:param probe_name: the name of the scalar probe.
|
|
:raises ParserError: if the length of the names exceeds the limit or they don't
|
|
conform our name specification.
|
|
"""
|
|
|
|
# Enforce a maximum length on category and probe names.
|
|
MAX_NAME_LENGTH = 40
|
|
for n in [category_name, probe_name]:
|
|
if len(n) > MAX_NAME_LENGTH:
|
|
ParserError(
|
|
f"Name '{n}' exceeds maximum name length of {MAX_NAME_LENGTH} characters.\n"
|
|
f"See: {BASE_DOC_URL}#the-yaml-definition-file"
|
|
).handle_later()
|
|
|
|
def check_name(name, error_msg_prefix, allowed_char_regexp):
|
|
# Check if we only have the allowed characters.
|
|
chars_regxp = r"^[a-zA-Z0-9" + allowed_char_regexp + r"]+$"
|
|
if not re.search(chars_regxp, name):
|
|
ParserError(
|
|
(
|
|
error_msg_prefix + " name must be alpha-numeric. Got: '{}'.\n"
|
|
"See: {}#the-yaml-definition-file"
|
|
).format(name, BASE_DOC_URL)
|
|
).handle_later()
|
|
|
|
# Don't allow leading/trailing digits, '.' or '_'.
|
|
if re.search(r"(^[\d\._])|([\d\._])$", name):
|
|
ParserError(
|
|
(
|
|
error_msg_prefix + " name must not have a leading/trailing "
|
|
"digit, a dot or underscore. Got: '{}'.\n"
|
|
" See: {}#the-yaml-definition-file"
|
|
).format(name, BASE_DOC_URL)
|
|
).handle_later()
|
|
|
|
check_name(category_name, "Category", r"\.")
|
|
check_name(probe_name, "Probe", r"_")
|
|
|
|
def validate_types(self, definition):
|
|
"""This function performs some basic sanity checks on the scalar definition:
|
|
- Checks that all the required fields are available.
|
|
- Checks that all the fields have the expected types.
|
|
|
|
:param definition: the dictionary containing the scalar properties.
|
|
:raises ParserError: if a scalar definition field is of the wrong type.
|
|
:raises ParserError: if a required field is missing or unknown fields are present.
|
|
"""
|
|
|
|
if not self._strict_type_checks:
|
|
return
|
|
|
|
def validate_notification_email(notification_email):
|
|
# Perform simple email validation to make sure it doesn't contain spaces or commas.
|
|
return not any(c in notification_email for c in [",", " "])
|
|
|
|
# The required and optional fields in a scalar type definition.
|
|
REQUIRED_FIELDS = {
|
|
"bug_numbers": list, # This contains ints. See LIST_FIELDS_CONTENT.
|
|
"description": str,
|
|
"expires": str,
|
|
"kind": str,
|
|
"notification_emails": list, # This contains strings. See LIST_FIELDS_CONTENT.
|
|
"record_in_processes": list,
|
|
"products": list,
|
|
}
|
|
|
|
OPTIONAL_FIELDS = {
|
|
"release_channel_collection": str,
|
|
"keyed": bool,
|
|
"keys": list,
|
|
"operating_systems": list,
|
|
"record_into_store": list,
|
|
}
|
|
|
|
# The types for the data within the fields that hold lists.
|
|
LIST_FIELDS_CONTENT = {
|
|
"bug_numbers": int,
|
|
"notification_emails": str,
|
|
"record_in_processes": str,
|
|
"products": str,
|
|
"keys": str,
|
|
"operating_systems": str,
|
|
"record_into_store": str,
|
|
}
|
|
|
|
# Concatenate the required and optional field definitions.
|
|
ALL_FIELDS = REQUIRED_FIELDS.copy()
|
|
ALL_FIELDS.update(OPTIONAL_FIELDS)
|
|
|
|
# Checks that all the required fields are available.
|
|
missing_fields = [f for f in REQUIRED_FIELDS.keys() if f not in definition]
|
|
if len(missing_fields) > 0:
|
|
ParserError(
|
|
self._name
|
|
+ " - missing required fields: "
|
|
+ ", ".join(missing_fields)
|
|
+ f".\nSee: {BASE_DOC_URL}#required-fields"
|
|
).handle_later()
|
|
|
|
# Do we have any unknown field?
|
|
unknown_fields = [f for f in definition.keys() if f not in ALL_FIELDS]
|
|
if len(unknown_fields) > 0:
|
|
ParserError(
|
|
self._name
|
|
+ " - unknown fields: "
|
|
+ ", ".join(unknown_fields)
|
|
+ f".\nSee: {BASE_DOC_URL}#required-fields"
|
|
).handle_later()
|
|
|
|
# Checks the type for all the fields.
|
|
wrong_type_names = [
|
|
f"{f} must be {str(ALL_FIELDS[f])}"
|
|
for f in definition.keys()
|
|
if not isinstance(definition[f], ALL_FIELDS[f])
|
|
]
|
|
if len(wrong_type_names) > 0:
|
|
ParserError(
|
|
self._name
|
|
+ " - "
|
|
+ ", ".join(wrong_type_names)
|
|
+ f".\nSee: {BASE_DOC_URL}#required-fields"
|
|
).handle_later()
|
|
|
|
# Check that the email addresses doesn't contain spaces or commas
|
|
notification_emails = definition.get("notification_emails")
|
|
for notification_email in notification_emails:
|
|
if not validate_notification_email(notification_email):
|
|
ParserError(
|
|
self._name
|
|
+ " - invalid email address: "
|
|
+ notification_email
|
|
+ f".\nSee: {BASE_DOC_URL}"
|
|
).handle_later()
|
|
|
|
# Check that the lists are not empty and that data in the lists
|
|
# have the correct types.
|
|
list_fields = [f for f in definition if isinstance(definition[f], list)]
|
|
for field in list_fields:
|
|
# Check for empty lists.
|
|
if len(definition[field]) == 0:
|
|
ParserError(
|
|
(
|
|
"Field '{}' for probe '{}' must not be empty"
|
|
+ ".\nSee: {}#required-fields)"
|
|
).format(field, self._name, BASE_DOC_URL)
|
|
).handle_later()
|
|
# Check the type of the list content.
|
|
broken_types = [
|
|
not isinstance(v, LIST_FIELDS_CONTENT[field]) for v in definition[field]
|
|
]
|
|
if any(broken_types):
|
|
ParserError(
|
|
f"Field '{field}' for probe '{self._name}' must only contain values of type {str(LIST_FIELDS_CONTENT[field])}"
|
|
f".\nSee: {BASE_DOC_URL}#the-yaml-definition-file)"
|
|
).handle_later()
|
|
|
|
# Check that keys are only added to keyed scalars and that their values are valid
|
|
MAX_KEY_COUNT = 100
|
|
MAX_KEY_LENGTH = 72
|
|
keys = definition.get("keys")
|
|
if keys is not None:
|
|
if not definition.get("keyed", False):
|
|
ParserError(
|
|
self._name
|
|
+ "- invalid field: "
|
|
+ "\n`keys` field only valid for keyed histograms"
|
|
).handle_later()
|
|
|
|
if len(keys) > MAX_KEY_COUNT:
|
|
ParserError(
|
|
self._name
|
|
+ " - exceeding key count: "
|
|
+ f"\n`keys` values count must not exceed {MAX_KEY_COUNT}"
|
|
).handle_later()
|
|
|
|
invalid = list(filter(lambda k: len(k) > MAX_KEY_LENGTH, keys))
|
|
if len(invalid) > 0:
|
|
ParserError(
|
|
self._name
|
|
+ " - invalid key value"
|
|
+ f"\n `keys` values are exceeding length {MAX_KEY_LENGTH}:"
|
|
+ ", ".join(invalid)
|
|
).handle_later()
|
|
|
|
def validate_values(self, definition):
|
|
"""This function checks that the fields have the correct values.
|
|
|
|
:param definition: the dictionary containing the scalar properties.
|
|
:raises ParserError: if a scalar definition field contains an unexpected value.
|
|
"""
|
|
|
|
if not self._strict_type_checks:
|
|
return
|
|
|
|
# Validate the scalar kind.
|
|
scalar_kind = definition.get("kind")
|
|
if scalar_kind not in SCALAR_TYPES_MAP.keys():
|
|
ParserError(
|
|
self._name
|
|
+ " - unknown scalar kind: "
|
|
+ scalar_kind
|
|
+ f".\nSee: {BASE_DOC_URL}"
|
|
).handle_later()
|
|
|
|
# Validate the collection policy.
|
|
collection_policy = definition.get("release_channel_collection", None)
|
|
if collection_policy and collection_policy not in ["opt-in", "opt-out"]:
|
|
ParserError(
|
|
self._name
|
|
+ " - unknown collection policy: "
|
|
+ collection_policy
|
|
+ f".\nSee: {BASE_DOC_URL}#optional-fields"
|
|
).handle_later()
|
|
|
|
# Validate operating_systems.
|
|
if self._strict_type_checks and "operating_systems" in definition:
|
|
ParserError(
|
|
f"{self._name} - uses obsolete field 'operating_systems'."
|
|
).handle_later()
|
|
operating_systems = definition.get("operating_systems", [])
|
|
for operating_system in operating_systems:
|
|
if not utils.is_valid_os(operating_system):
|
|
ParserError(
|
|
self._name
|
|
+ " - invalid entry in operating_systems: "
|
|
+ operating_system
|
|
+ f".\nSee: {BASE_DOC_URL}#optional-fields"
|
|
).handle_later()
|
|
|
|
# Validate record_in_processes.
|
|
record_in_processes = definition.get("record_in_processes", [])
|
|
for proc in record_in_processes:
|
|
if not utils.is_valid_process_name(proc):
|
|
ParserError(
|
|
self._name
|
|
+ " - unknown value in record_in_processes: "
|
|
+ proc
|
|
+ f".\nSee: {BASE_DOC_URL}"
|
|
).handle_later()
|
|
|
|
# Validate product.
|
|
products = definition.get("products", [])
|
|
for product in products:
|
|
if not utils.is_valid_product(product):
|
|
ParserError(
|
|
self._name
|
|
+ " - unknown value in products: "
|
|
+ product
|
|
+ f".\nSee: {BASE_DOC_URL}"
|
|
).handle_later()
|
|
|
|
# Validate the expiration version.
|
|
# Historical versions of Scalars.json may contain expiration versions
|
|
# using the deprecated format 'N.Na1'. Those scripts set
|
|
# self._strict_type_checks to false.
|
|
expires = definition.get("expires")
|
|
if not utils.validate_expiration_version(expires) and self._strict_type_checks:
|
|
ParserError(
|
|
f"{self._name} - invalid expires: {expires}.\nSee: {BASE_DOC_URL}#required-fields"
|
|
).handle_later()
|
|
|
|
@property
|
|
def category(self):
|
|
"""Get the category name"""
|
|
return self._category_name
|
|
|
|
@property
|
|
def name(self):
|
|
"""Get the scalar name"""
|
|
return self._name
|
|
|
|
@property
|
|
def label(self):
|
|
"""Get the scalar label generated from the scalar and category names."""
|
|
return self._category_name + "." + self._name
|
|
|
|
@property
|
|
def enum_label(self):
|
|
"""Get the enum label generated from the scalar and category names. This is used to
|
|
generate the enum tables."""
|
|
|
|
# The scalar name can contain informations about its hierarchy (e.g. 'a.b.scalar').
|
|
# We can't have dots in C++ enums, replace them with an underscore. Also, make the
|
|
# label upper case for consistency with the histogram enums.
|
|
return self.label.replace(".", "_").upper()
|
|
|
|
@property
|
|
def bug_numbers(self):
|
|
"""Get the list of related bug numbers"""
|
|
return self._definition["bug_numbers"]
|
|
|
|
@property
|
|
def description(self):
|
|
"""Get the scalar description"""
|
|
return self._definition["description"]
|
|
|
|
@property
|
|
def expires(self):
|
|
"""Get the scalar expiration"""
|
|
return self._expires
|
|
|
|
@property
|
|
def kind(self):
|
|
"""Get the scalar kind"""
|
|
return self._definition["kind"]
|
|
|
|
@property
|
|
def keys(self):
|
|
"""Get the allowed keys for this scalar or [] if there aren't any'"""
|
|
return self._definition.get("keys", [])
|
|
|
|
@property
|
|
def keyed(self):
|
|
"""Boolean indicating whether this is a keyed scalar"""
|
|
return self._definition.get("keyed", False)
|
|
|
|
@property
|
|
def nsITelemetry_kind(self):
|
|
"""Get the scalar kind constant defined in nsITelemetry"""
|
|
return SCALAR_TYPES_MAP.get(self.kind)
|
|
|
|
@property
|
|
def notification_emails(self):
|
|
"""Get the list of notification emails"""
|
|
return self._definition["notification_emails"]
|
|
|
|
@property
|
|
def record_in_processes(self):
|
|
"""Get the non-empty list of processes to record data in"""
|
|
# Before we added content process support in bug 1278556, we only recorded in the
|
|
# main process.
|
|
return self._definition.get("record_in_processes", ["main"])
|
|
|
|
@property
|
|
def record_in_processes_enum(self):
|
|
"""Get the non-empty list of flags representing the processes to record data in"""
|
|
return [utils.process_name_to_enum(p) for p in self.record_in_processes]
|
|
|
|
@property
|
|
def products(self):
|
|
"""Get the non-empty list of products to record data on"""
|
|
return self._definition.get("products")
|
|
|
|
@property
|
|
def products_enum(self):
|
|
"""Get the non-empty list of flags representing products to record data on"""
|
|
return [utils.product_name_to_enum(p) for p in self.products]
|
|
|
|
@property
|
|
def dataset(self):
|
|
"""Get the nsITelemetry constant equivalent to the chosen release channel collection
|
|
policy for the scalar.
|
|
"""
|
|
rcc = self.dataset_short
|
|
table = {
|
|
"opt-in": "DATASET_PRERELEASE_CHANNELS",
|
|
"opt-out": "DATASET_ALL_CHANNELS",
|
|
}
|
|
return "nsITelemetry::" + table[rcc]
|
|
|
|
@property
|
|
def dataset_short(self):
|
|
"""Get the short name of the chosen release channel collection policy for the scalar."""
|
|
# The collection policy is optional, but we still define a default
|
|
# behaviour for it.
|
|
return self._definition.get("release_channel_collection", "opt-in")
|
|
|
|
@property
|
|
def operating_systems(self):
|
|
"""Get the list of operating systems to record data on"""
|
|
return self._definition.get("operating_systems", ["all"])
|
|
|
|
def record_on_os(self, target_os):
|
|
"""Check if this probe should be recorded on the passed os."""
|
|
os = self.operating_systems
|
|
if "all" in os:
|
|
return True
|
|
|
|
canonical_os = utils.canonical_os(target_os)
|
|
|
|
if "unix" in os and canonical_os in utils.UNIX_LIKE_OS:
|
|
return True
|
|
|
|
return canonical_os in os
|
|
|
|
@property
|
|
def record_into_store(self):
|
|
"""Get the list of stores this probe should be recorded into"""
|
|
return self._definition.get("record_into_store", ["main"])
|
|
|
|
|
|
def load_scalars(filename, strict_type_checks=True):
|
|
"""Parses a YAML file containing the scalar definition.
|
|
|
|
:param filename: the YAML file containing the scalars definition.
|
|
:raises ParserError: if the scalar file cannot be opened or parsed.
|
|
"""
|
|
|
|
# Parse the scalar definitions from the YAML file.
|
|
scalars = None
|
|
try:
|
|
with open(filename, encoding="utf-8") as f:
|
|
scalars = yaml.safe_load(f)
|
|
except OSError as e:
|
|
ParserError("Error opening " + filename + ": " + str(e)).handle_now()
|
|
except ValueError as e:
|
|
ParserError(
|
|
f"Error parsing scalars in {filename}: {e}" f".\nSee: {BASE_DOC_URL}"
|
|
).handle_now()
|
|
|
|
scalar_list = []
|
|
|
|
# Scalars are defined in a fixed two-level hierarchy within the definition file.
|
|
# The first level contains the category name, while the second level contains the
|
|
# probe name (e.g. "category.name: probe: ...").
|
|
for category_name in sorted(scalars):
|
|
category = scalars[category_name]
|
|
|
|
# Make sure that the category has at least one probe in it.
|
|
if not category or len(category) == 0:
|
|
ParserError(
|
|
f'Category "{category_name}" must have at least one probe in it'
|
|
f".\nSee: {BASE_DOC_URL}"
|
|
).handle_later()
|
|
|
|
for probe_name in sorted(category):
|
|
# We found a scalar type. Go ahead and parse it.
|
|
scalar_info = category[probe_name]
|
|
scalar_list.append(
|
|
ScalarType(category_name, probe_name, scalar_info, strict_type_checks)
|
|
)
|
|
|
|
return scalar_list
|