summaryrefslogtreecommitdiffstats
path: root/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py')
-rw-r--r--toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py836
1 files changed, 836 insertions, 0 deletions
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py
new file mode 100644
index 0000000000..626188bf06
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py
@@ -0,0 +1,836 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import atexit
+import collections
+import itertools
+import json
+import math
+import os
+import re
+from collections import OrderedDict
+from ctypes import c_int
+
+from . import shared_telemetry_utils as utils
+from .shared_telemetry_utils import ParserError
+
+atexit.register(ParserError.exit_func)
+
+# Constants.
+MAX_LABEL_LENGTH = 20
+MAX_LABEL_COUNT = 100
+MAX_KEY_COUNT = 30
+MAX_KEY_LENGTH = 20
+MIN_CATEGORICAL_BUCKET_COUNT = 50
+CPP_IDENTIFIER_PATTERN = "^[a-z][a-z0-9_]+[a-z0-9]$"
+
+ALWAYS_ALLOWED_KEYS = [
+ "kind",
+ "description",
+ "operating_systems",
+ "expires_in_version",
+ "alert_emails",
+ "keyed",
+ "releaseChannelCollection",
+ "bug_numbers",
+ "keys",
+ "record_in_processes",
+ "record_into_store",
+ "products",
+]
+
+BASE_DOC_URL = (
+ "https://firefox-source-docs.mozilla.org/toolkit/components/" "telemetry/telemetry/"
+)
+HISTOGRAMS_DOC_URL = BASE_DOC_URL + "collection/histograms.html"
+SCALARS_DOC_URL = BASE_DOC_URL + "collection/scalars.html"
+
+GECKOVIEW_STREAMING_SUPPORTED_KINDS = [
+ "linear",
+ "exponential",
+ "categorical",
+]
+
+
+def linear_buckets(dmin, dmax, n_buckets):
+ ret_array = [0] * n_buckets
+ dmin = float(dmin)
+ dmax = float(dmax)
+ for i in range(1, n_buckets):
+ linear_range = (dmin * (n_buckets - 1 - i) + dmax * (i - 1)) / (n_buckets - 2)
+ ret_array[i] = int(linear_range + 0.5)
+ return ret_array
+
+
+def exponential_buckets(dmin, dmax, n_buckets):
+ log_max = math.log(dmax)
+ bucket_index = 2
+ ret_array = [0] * n_buckets
+ current = dmin
+ ret_array[1] = current
+ for bucket_index in range(2, n_buckets):
+ log_current = math.log(current)
+ log_ratio = (log_max - log_current) / (n_buckets - bucket_index)
+ log_next = log_current + log_ratio
+ next_value = int(math.floor(math.exp(log_next) + 0.5))
+ if next_value > current:
+ current = next_value
+ else:
+ current = current + 1
+ ret_array[bucket_index] = current
+ return ret_array
+
+
+allowlists = None
+
+
+def load_allowlist():
+ global allowlists
+ try:
+ parsers_path = os.path.realpath(os.path.dirname(__file__))
+ # The parsers live in build_scripts/parsers in the Telemetry module, while
+ # the histogram-allowlists file lives in the root of the module. Account
+ # for that when looking for the allowlist.
+ # NOTE: if the parsers are moved, this logic will need to be updated.
+ telemetry_module_path = os.path.abspath(
+ os.path.join(parsers_path, os.pardir, os.pardir)
+ )
+ allowlist_path = os.path.join(
+ telemetry_module_path, "histogram-allowlists.json"
+ )
+ with open(allowlist_path, "r") as f:
+ try:
+ allowlists = json.load(f)
+ for name, allowlist in allowlists.items():
+ allowlists[name] = set(allowlist)
+ except ValueError:
+ ParserError("Error parsing allowlist: %s" % allowlist_path).handle_now()
+ except IOError:
+ allowlists = None
+ ParserError("Unable to parse allowlist: %s." % allowlist_path).handle_now()
+
+
+class Histogram:
+ """A class for representing a histogram definition."""
+
+ def __init__(self, name, definition, strict_type_checks=False):
+ """Initialize a histogram named name with the given definition.
+ definition is a dict-like object that must contain at least the keys:
+
+ - 'kind': The kind of histogram. Must be one of 'boolean', 'flag',
+ 'count', 'enumerated', 'linear', or 'exponential'.
+ - 'description': A textual description of the histogram.
+ - 'strict_type_checks': A boolean indicating whether to use the new, stricter type checks.
+ The server-side still has to deal with old, oddly typed
+ submissions, so we have to skip them there by default.
+ """
+ self._strict_type_checks = strict_type_checks
+ self.verify_attributes(name, definition)
+ self._name = name
+ self._description = definition["description"]
+ self._kind = definition["kind"]
+ self._keys = definition.get("keys", [])
+ self._keyed = definition.get("keyed", False)
+ self._expiration = definition.get("expires_in_version")
+ self._labels = definition.get("labels", [])
+ self._record_in_processes = definition.get("record_in_processes")
+ self._record_into_store = definition.get("record_into_store", ["main"])
+ self._products = definition.get("products")
+ self._operating_systems = definition.get("operating_systems", ["all"])
+
+ self.compute_bucket_parameters(definition)
+ self.set_nsITelemetry_kind()
+ self.set_dataset(definition)
+
+ def name(self):
+ """Return the name of the histogram."""
+ return self._name
+
+ def description(self):
+ """Return the description of the histogram."""
+ return self._description
+
+ def kind(self):
+ """Return the kind of the histogram.
+ Will be one of 'boolean', 'flag', 'count', 'enumerated', 'categorical', 'linear',
+ or 'exponential'."""
+ return self._kind
+
+ def expiration(self):
+ """Return the expiration version of the histogram."""
+ return self._expiration
+
+ def nsITelemetry_kind(self):
+ """Return the nsITelemetry constant corresponding to the kind of
+ the histogram."""
+ return self._nsITelemetry_kind
+
+ def low(self):
+ """Return the lower bound of the histogram."""
+ return self._low
+
+ def high(self):
+ """Return the high bound of the histogram."""
+ return self._high
+
+ def n_buckets(self):
+ """Return the number of buckets in the histogram."""
+ return self._n_buckets
+
+ def keyed(self):
+ """Returns True if this a keyed histogram, false otherwise."""
+ return self._keyed
+
+ def keys(self):
+ """Returns a list of allowed keys for keyed histogram, [] for others."""
+ return self._keys
+
+ def dataset(self):
+ """Returns the dataset this histogram belongs into."""
+ return self._dataset
+
+ def labels(self):
+ """Returns a list of labels for a categorical histogram, [] for others."""
+ return self._labels
+
+ def record_in_processes(self):
+ """Returns a list of processes this histogram is permitted to record in."""
+ return self._record_in_processes
+
+ def record_in_processes_enum(self):
+ """Get the non-empty list of flags representing the processes to record data in"""
+ return [utils.process_name_to_enum(p) for p in self.record_in_processes()]
+
+ def products(self):
+ """Get the non-empty list of products to record data on"""
+ return self._products
+
+ def products_enum(self):
+ """Get the non-empty list of flags representing products to record data on"""
+ return [utils.product_name_to_enum(p) for p in self.products()]
+
+ def operating_systems(self):
+ """Get the list of operating systems to record data on"""
+ return self._operating_systems
+
+ def record_on_os(self, target_os):
+ """Check if this probe should be recorded on the passed os."""
+ os = self.operating_systems()
+ if "all" in os:
+ return True
+
+ canonical_os = utils.canonical_os(target_os)
+
+ if "unix" in os and canonical_os in utils.UNIX_LIKE_OS:
+ return True
+
+ return canonical_os in os
+
+ def record_into_store(self):
+ """Get the non-empty list of stores to record into"""
+ return self._record_into_store
+
+ def ranges(self):
+ """Return an array of lower bounds for each bucket in the histogram."""
+ bucket_fns = {
+ "boolean": linear_buckets,
+ "flag": linear_buckets,
+ "count": linear_buckets,
+ "enumerated": linear_buckets,
+ "categorical": linear_buckets,
+ "linear": linear_buckets,
+ "exponential": exponential_buckets,
+ }
+
+ if self._kind not in bucket_fns:
+ ParserError(
+ 'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)
+ ).handle_later()
+
+ fn = bucket_fns[self._kind]
+ return fn(self.low(), self.high(), self.n_buckets())
+
+ def compute_bucket_parameters(self, definition):
+ bucket_fns = {
+ "boolean": Histogram.boolean_flag_bucket_parameters,
+ "flag": Histogram.boolean_flag_bucket_parameters,
+ "count": Histogram.boolean_flag_bucket_parameters,
+ "enumerated": Histogram.enumerated_bucket_parameters,
+ "categorical": Histogram.categorical_bucket_parameters,
+ "linear": Histogram.linear_bucket_parameters,
+ "exponential": Histogram.exponential_bucket_parameters,
+ }
+
+ if self._kind not in bucket_fns:
+ ParserError(
+ 'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)
+ ).handle_later()
+
+ fn = bucket_fns[self._kind]
+ self.set_bucket_parameters(*fn(definition))
+
+ def verify_attributes(self, name, definition):
+ general_keys = ALWAYS_ALLOWED_KEYS + ["low", "high", "n_buckets"]
+
+ table = {
+ "boolean": ALWAYS_ALLOWED_KEYS,
+ "flag": ALWAYS_ALLOWED_KEYS,
+ "count": ALWAYS_ALLOWED_KEYS,
+ "enumerated": ALWAYS_ALLOWED_KEYS + ["n_values"],
+ "categorical": ALWAYS_ALLOWED_KEYS + ["labels", "n_values"],
+ "linear": general_keys,
+ "exponential": general_keys,
+ }
+ # We removed extended_statistics_ok on the client, but the server-side,
+ # where _strict_type_checks==False, has to deal with historical data.
+ if not self._strict_type_checks:
+ table["exponential"].append("extended_statistics_ok")
+
+ kind = definition["kind"]
+ if kind not in table:
+ ParserError(
+ 'Unknown kind "%s" for histogram "%s".' % (kind, name)
+ ).handle_later()
+ allowed_keys = table[kind]
+
+ self.check_name(name)
+ self.check_keys(name, definition, allowed_keys)
+ self.check_keys_field(name, definition)
+ self.check_field_types(name, definition)
+ self.check_allowlisted_kind(name, definition)
+ self.check_allowlistable_fields(name, definition)
+ self.check_expiration(name, definition)
+ self.check_label_values(name, definition)
+ self.check_record_in_processes(name, definition)
+ self.check_products(name, definition)
+ self.check_operating_systems(name, definition)
+ self.check_record_into_store(name, definition)
+
+ def check_name(self, name):
+ if "#" in name:
+ ParserError(
+ 'Error for histogram name "%s": "#" is not allowed.' % (name)
+ ).handle_later()
+
+ # Avoid C++ identifier conflicts between histogram enums and label enum names.
+ if name.startswith("LABELS_"):
+ ParserError(
+ 'Error for histogram name "%s": can not start with "LABELS_".' % (name)
+ ).handle_later()
+
+ # To make it easier to generate C++ identifiers from this etc., we restrict
+ # the histogram names to a strict pattern.
+ # We skip this on the server to avoid failures with old Histogram.json revisions.
+ if self._strict_type_checks:
+ if not re.match(CPP_IDENTIFIER_PATTERN, name, re.IGNORECASE):
+ ParserError(
+ 'Error for histogram name "%s": name does not conform to "%s"'
+ % (name, CPP_IDENTIFIER_PATTERN)
+ ).handle_later()
+
+ def check_expiration(self, name, definition):
+ field = "expires_in_version"
+ expiration = definition.get(field)
+
+ if not expiration:
+ return
+
+ # We forbid new probes from using "expires_in_version" : "default" field/value pair.
+ # Old ones that use this are added to the allowlist.
+ if (
+ expiration == "default"
+ and allowlists is not None
+ and name not in allowlists["expiry_default"]
+ ):
+ ParserError(
+ 'New histogram "%s" cannot have "default" %s value.' % (name, field)
+ ).handle_later()
+
+ # Historical editions of Histograms.json can have the deprecated
+ # expiration format 'N.Na1'. Fortunately, those scripts set
+ # self._strict_type_checks to false.
+ if (
+ expiration != "default"
+ and not utils.validate_expiration_version(expiration)
+ and self._strict_type_checks
+ ):
+ ParserError(
+ (
+ "Error for histogram {} - invalid {}: {}."
+ "\nSee: {}#expires-in-version"
+ ).format(name, field, expiration, HISTOGRAMS_DOC_URL)
+ ).handle_later()
+
+ expiration = utils.add_expiration_postfix(expiration)
+
+ definition[field] = expiration
+
+ def check_label_values(self, name, definition):
+ labels = definition.get("labels")
+ if not labels:
+ return
+
+ invalid = filter(lambda l: len(l) > MAX_LABEL_LENGTH, labels)
+ if len(list(invalid)) > 0:
+ ParserError(
+ 'Label values for "%s" exceed length limit of %d: %s'
+ % (name, MAX_LABEL_LENGTH, ", ".join(invalid))
+ ).handle_later()
+
+ if len(labels) > MAX_LABEL_COUNT:
+ ParserError(
+ 'Label count for "%s" exceeds limit of %d' % (name, MAX_LABEL_COUNT)
+ ).handle_now()
+
+ # To make it easier to generate C++ identifiers from this etc., we restrict
+ # the label values to a strict pattern.
+ invalid = filter(
+ lambda l: not re.match(CPP_IDENTIFIER_PATTERN, l, re.IGNORECASE), labels
+ )
+ if len(list(invalid)) > 0:
+ ParserError(
+ 'Label values for %s are not matching pattern "%s": %s'
+ % (name, CPP_IDENTIFIER_PATTERN, ", ".join(invalid))
+ ).handle_later()
+
+ def check_record_in_processes(self, name, definition):
+ if not self._strict_type_checks:
+ return
+
+ field = "record_in_processes"
+ rip = definition.get(field)
+
+ DOC_URL = HISTOGRAMS_DOC_URL + "#record-in-processes"
+
+ if not rip:
+ ParserError(
+ 'Histogram "%s" must have a "%s" field:\n%s' % (name, field, DOC_URL)
+ ).handle_later()
+
+ for process in rip:
+ if not utils.is_valid_process_name(process):
+ ParserError(
+ 'Histogram "%s" has unknown process "%s" in %s.\n%s'
+ % (name, process, field, DOC_URL)
+ ).handle_later()
+
+ def check_products(self, name, definition):
+ if not self._strict_type_checks:
+ return
+
+ field = "products"
+ products = definition.get(field)
+
+ DOC_URL = HISTOGRAMS_DOC_URL + "#products"
+
+ if not products:
+ ParserError(
+ 'Histogram "%s" must have a "%s" field:\n%s' % (name, field, DOC_URL)
+ ).handle_now()
+
+ for product in products:
+ if not utils.is_valid_product(product):
+ ParserError(
+ 'Histogram "%s" has unknown product "%s" in %s.\n%s'
+ % (name, product, field, DOC_URL)
+ ).handle_later()
+ if utils.is_geckoview_streaming_product(product):
+ kind = definition.get("kind")
+ if kind not in GECKOVIEW_STREAMING_SUPPORTED_KINDS:
+ ParserError(
+ (
+ 'Histogram "%s" is of kind "%s" which is unsupported for '
+ 'product "%s".'
+ )
+ % (name, kind, product)
+ ).handle_later()
+ keyed = definition.get("keyed")
+ if keyed:
+ ParserError(
+ 'Keyed histograms like "%s" are unsupported for product "%s"'
+ % (name, product)
+ ).handle_later()
+
+ def check_operating_systems(self, name, definition):
+ if not self._strict_type_checks:
+ return
+
+ field = "operating_systems"
+ operating_systems = definition.get(field)
+
+ DOC_URL = HISTOGRAMS_DOC_URL + "#operating-systems"
+
+ if not operating_systems:
+ # operating_systems is optional
+ return
+
+ for operating_system in operating_systems:
+ if not utils.is_valid_os(operating_system):
+ ParserError(
+ 'Histogram "%s" has unknown operating system "%s" in %s.\n%s'
+ % (name, operating_system, field, DOC_URL)
+ ).handle_later()
+
+ def check_record_into_store(self, name, definition):
+ if not self._strict_type_checks:
+ return
+
+ field = "record_into_store"
+ DOC_URL = HISTOGRAMS_DOC_URL + "#record-into-store"
+
+ if field not in definition:
+ # record_into_store is optional
+ return
+
+ record_into_store = definition.get(field)
+ # record_into_store should not be empty
+ if not record_into_store:
+ ParserError(
+ 'Histogram "%s" has empty list of stores, which is not allowed.\n%s'
+ % (name, DOC_URL)
+ ).handle_later()
+
+ def check_keys_field(self, name, definition):
+ keys = definition.get("keys")
+ if not self._strict_type_checks or keys is None:
+ return
+
+ if not definition.get("keyed", False):
+ raise ValueError(
+ "'keys' field is not valid for %s; only allowed for keyed histograms."
+ % (name)
+ )
+
+ if len(keys) == 0:
+ raise ValueError("The key list for %s cannot be empty" % (name))
+
+ if len(keys) > MAX_KEY_COUNT:
+ raise ValueError(
+ "Label count for %s exceeds limit of %d" % (name, MAX_KEY_COUNT)
+ )
+
+ invalid = filter(lambda k: len(k) > MAX_KEY_LENGTH, keys)
+ if len(list(invalid)) > 0:
+ raise ValueError(
+ '"keys" values for %s are exceeding length "%d": %s'
+ % (name, MAX_KEY_LENGTH, ", ".join(invalid))
+ )
+
+ def check_allowlisted_kind(self, name, definition):
+ # We don't need to run any of these checks on the server.
+ if not self._strict_type_checks or allowlists is None:
+ return
+
+ # Disallow "flag" and "count" histograms on desktop, suggest to use
+ # scalars instead. Allow using these histograms on Android, as we
+ # don't support scalars there yet.
+ hist_kind = definition.get("kind")
+ android_target = "android" in definition.get("operating_systems", [])
+
+ if (
+ not android_target
+ and hist_kind in ["flag", "count"]
+ and name not in allowlists["kind"]
+ ):
+ ParserError(
+ (
+ 'Unsupported kind "%s" for histogram "%s":\n'
+ 'New "%s" histograms are not supported on Desktop, you should'
+ " use scalars instead:\n"
+ "%s\n"
+ "Are you trying to add a histogram on Android?"
+ ' Add "operating_systems": ["android"] to your histogram definition.'
+ )
+ % (hist_kind, name, hist_kind, SCALARS_DOC_URL)
+ ).handle_now()
+
+ # Check for the presence of fields that old histograms are allowlisted for.
+ def check_allowlistable_fields(self, name, definition):
+ # We don't need to run any of these checks on the server.
+ if not self._strict_type_checks:
+ return
+
+ # In the pipeline we don't have allowlists available.
+ if allowlists is None:
+ return
+
+ for field in ["alert_emails", "bug_numbers"]:
+ if field not in definition and name not in allowlists[field]:
+ ParserError(
+ 'New histogram "%s" must have a "%s" field.' % (name, field)
+ ).handle_later()
+ if field in definition and name in allowlists[field]:
+ msg = (
+ 'Histogram "%s" should be removed from the allowlist for "%s" in '
+ "histogram-allowlists.json."
+ )
+ ParserError(msg % (name, field)).handle_later()
+
+ def check_field_types(self, name, definition):
+ # Define expected types for the histogram properties.
+ type_checked_fields = {
+ "n_buckets": int,
+ "n_values": int,
+ "low": int,
+ "high": int,
+ "keyed": bool,
+ "expires_in_version": str,
+ "kind": str,
+ "description": str,
+ "releaseChannelCollection": str,
+ }
+
+ # For list fields we check the items types.
+ type_checked_list_fields = {
+ "bug_numbers": int,
+ "alert_emails": str,
+ "labels": str,
+ "record_in_processes": str,
+ "keys": str,
+ "products": str,
+ "operating_systems": str,
+ "record_into_store": str,
+ }
+
+ # For the server-side, where _strict_type_checks==False, we want to
+ # skip the stricter type checks for these fields for dealing with
+ # historical data.
+ coerce_fields = ["low", "high", "n_values", "n_buckets"]
+ if not self._strict_type_checks:
+ # This handles some old non-numeric expressions.
+ EXPRESSIONS = {
+ "JS::GCReason::NUM_TELEMETRY_REASONS": 101,
+ "mozilla::StartupTimeline::MAX_EVENT_ID": 12,
+ }
+
+ def try_to_coerce_to_number(v):
+ if v in EXPRESSIONS:
+ return EXPRESSIONS[v]
+ try:
+ return eval(v, {})
+ except Exception:
+ return v
+
+ for key in [k for k in coerce_fields if k in definition]:
+ definition[key] = try_to_coerce_to_number(definition[key])
+ # This handles old "keyed":"true" definitions (bug 1271986).
+ if definition.get("keyed", None) == "true":
+ definition["keyed"] = True
+
+ def nice_type_name(t):
+ if t is str:
+ return "string"
+ return t.__name__
+
+ for key, key_type in type_checked_fields.items():
+ if key not in definition:
+ continue
+ if not isinstance(definition[key], key_type):
+ ParserError(
+ 'Value for key "{0}" in histogram "{1}" should be {2}.'.format(
+ key, name, nice_type_name(key_type)
+ )
+ ).handle_later()
+
+ # Make sure the max range is lower than or equal to INT_MAX
+ if "high" in definition and not c_int(definition["high"]).value > 0:
+ ParserError(
+ 'Value for high in histogram "{0}" should be lower or equal to INT_MAX.'.format(
+ nice_type_name(c_int)
+ )
+ ).handle_later()
+
+ for key, key_type in type_checked_list_fields.items():
+ if key not in definition:
+ continue
+ if not all(isinstance(x, key_type) for x in definition[key]):
+ ParserError(
+ 'All values for list "{0}" in histogram "{1}" should be of type'
+ " {2}.".format(key, name, nice_type_name(key_type))
+ ).handle_later()
+
+ def check_keys(self, name, definition, allowed_keys):
+ if not self._strict_type_checks:
+ return
+ for key in iter(definition.keys()):
+ if key not in allowed_keys:
+ ParserError(
+ 'Key "%s" is not allowed for histogram "%s".' % (key, name)
+ ).handle_later()
+
+ def set_bucket_parameters(self, low, high, n_buckets):
+ self._low = low
+ self._high = high
+ self._n_buckets = n_buckets
+ max_n_buckets = 101 if self._kind in ["enumerated", "categorical"] else 100
+ if (
+ allowlists is not None
+ and self._n_buckets > max_n_buckets
+ and type(self._n_buckets) is int
+ ):
+ if self._name not in allowlists["n_buckets"]:
+ ParserError(
+ 'New histogram "%s" is not permitted to have more than 100 buckets.\n'
+ "Histograms with large numbers of buckets use disproportionately high"
+ " amounts of resources. Contact a Telemetry peer (e.g. in #telemetry)"
+ " if you think an exception ought to be made:\n"
+ "https://wiki.mozilla.org/Modules/Toolkit#Telemetry" % self._name
+ ).handle_later()
+
+ @staticmethod
+ def boolean_flag_bucket_parameters(definition):
+ return (1, 2, 3)
+
+ @staticmethod
+ def linear_bucket_parameters(definition):
+ return (definition.get("low", 1), definition["high"], definition["n_buckets"])
+
+ @staticmethod
+ def enumerated_bucket_parameters(definition):
+ n_values = definition["n_values"]
+ return (1, n_values, n_values + 1)
+
+ @staticmethod
+ def categorical_bucket_parameters(definition):
+ # Categorical histograms default to 50 buckets to make working with them easier.
+ # Otherwise when adding labels later we run into problems with the pipeline not
+ # supporting bucket changes.
+ # This can be overridden using the n_values field.
+ n_values = max(
+ len(definition["labels"]),
+ definition.get("n_values", 0),
+ MIN_CATEGORICAL_BUCKET_COUNT,
+ )
+ return (1, n_values, n_values + 1)
+
+ @staticmethod
+ def exponential_bucket_parameters(definition):
+ return (definition.get("low", 1), definition["high"], definition["n_buckets"])
+
+ def set_nsITelemetry_kind(self):
+ # Pick a Telemetry implementation type.
+ types = {
+ "boolean": "BOOLEAN",
+ "flag": "FLAG",
+ "count": "COUNT",
+ "enumerated": "LINEAR",
+ "categorical": "CATEGORICAL",
+ "linear": "LINEAR",
+ "exponential": "EXPONENTIAL",
+ }
+
+ if self._kind not in types:
+ ParserError(
+ 'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)
+ ).handle_later()
+
+ self._nsITelemetry_kind = "nsITelemetry::HISTOGRAM_%s" % types[self._kind]
+
+ def set_dataset(self, definition):
+ datasets = {
+ "opt-in": "DATASET_PRERELEASE_CHANNELS",
+ "opt-out": "DATASET_ALL_CHANNELS",
+ }
+
+ value = definition.get("releaseChannelCollection", "opt-in")
+ if value not in datasets:
+ ParserError(
+ "Unknown value for releaseChannelCollection"
+ ' policy for histogram "%s".' % self._name
+ ).handle_later()
+
+ self._dataset = "nsITelemetry::" + datasets[value]
+
+
+# This hook function loads the histograms into an OrderedDict.
+# It will raise a ParserError if duplicate keys are found.
+def load_histograms_into_dict(ordered_pairs, strict_type_checks):
+ d = collections.OrderedDict()
+ for key, value in ordered_pairs:
+ if strict_type_checks and key in d:
+ ParserError(
+ "Found duplicate key in Histograms file: %s" % key
+ ).handle_later()
+ d[key] = value
+ return d
+
+
+# We support generating histograms from multiple different input files, not
+# just Histograms.json. For each file's basename, we have a specific
+# routine to parse that file, and return a dictionary mapping histogram
+# names to histogram parameters.
+def from_json(filename, strict_type_checks):
+ with open(filename, "r") as f:
+ try:
+
+ def hook(ps):
+ return load_histograms_into_dict(ps, strict_type_checks)
+
+ histograms = json.load(f, object_pairs_hook=hook)
+ except ValueError as e:
+ ParserError(
+ "error parsing histograms in %s: %s" % (filename, e)
+ ).handle_now()
+ return histograms
+
+
+def to_camel_case(property_name):
+ return re.sub(
+ "(^|_|-)([a-z0-9])",
+ lambda m: m.group(2).upper(),
+ property_name.strip("_").strip("-"),
+ )
+
+
+FILENAME_PARSERS = [
+ (lambda x: from_json if x.endswith(".json") else None),
+]
+
+
+def from_files(filenames, strict_type_checks=True):
+ """Return an iterator that provides a sequence of Histograms for
+ the histograms defined in filenames.
+ """
+ if strict_type_checks:
+ load_allowlist()
+
+ all_histograms = OrderedDict()
+ for filename in filenames:
+ parser = None
+ for checkFn in FILENAME_PARSERS:
+ parser = checkFn(os.path.basename(filename))
+ if parser is not None:
+ break
+
+ if parser is None:
+ ParserError("Don't know how to parse %s." % filename).handle_now()
+
+ histograms = parser(filename, strict_type_checks)
+
+ # OrderedDicts are important, because then the iteration order over
+ # the parsed histograms is stable, which makes the insertion into
+ # all_histograms stable, which makes ordering in generated files
+ # stable, which makes builds more deterministic.
+ if not isinstance(histograms, OrderedDict):
+ ParserError("Histogram parser did not provide an OrderedDict.").handle_now()
+
+ for name, definition in histograms.items():
+ if name in all_histograms:
+ ParserError('Duplicate histogram name "%s".' % name).handle_later()
+ all_histograms[name] = definition
+
+ # Check that histograms that were removed from Histograms.json etc.
+ # are also removed from the allowlists.
+ if allowlists is not None:
+ all_allowlist_entries = itertools.chain.from_iterable(iter(allowlists.values()))
+ orphaned = set(all_allowlist_entries) - set(all_histograms.keys())
+ if len(orphaned) > 0:
+ msg = (
+ "The following entries are orphaned and should be removed from "
+ "histogram-allowlists.json:\n%s"
+ )
+ ParserError(msg % (", ".join(sorted(orphaned)))).handle_later()
+
+ for name, definition in all_histograms.items():
+ yield Histogram(name, definition, strict_type_checks=strict_type_checks)