summaryrefslogtreecommitdiffstats
path: root/toolkit/components/telemetry/build_scripts
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /toolkit/components/telemetry/build_scripts
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'toolkit/components/telemetry/build_scripts')
-rw-r--r--toolkit/components/telemetry/build_scripts/README.md5
-rw-r--r--toolkit/components/telemetry/build_scripts/gen_event_data.py227
-rw-r--r--toolkit/components/telemetry/build_scripts/gen_event_enum.py81
-rw-r--r--toolkit/components/telemetry/build_scripts/gen_histogram_data.py297
-rw-r--r--toolkit/components/telemetry/build_scripts/gen_histogram_enum.py147
-rw-r--r--toolkit/components/telemetry/build_scripts/gen_histogram_phf.py73
-rw-r--r--toolkit/components/telemetry/build_scripts/gen_process_data.py80
-rw-r--r--toolkit/components/telemetry/build_scripts/gen_process_enum.py69
-rw-r--r--toolkit/components/telemetry/build_scripts/gen_scalar_data.py216
-rw-r--r--toolkit/components/telemetry/build_scripts/gen_scalar_enum.py60
-rw-r--r--toolkit/components/telemetry/build_scripts/gen_userinteraction_data.py105
-rw-r--r--toolkit/components/telemetry/build_scripts/gen_userinteraction_phf.py70
-rw-r--r--toolkit/components/telemetry/build_scripts/mozparsers/__init__.py3
-rw-r--r--toolkit/components/telemetry/build_scripts/mozparsers/parse_events.py477
-rw-r--r--toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py1009
-rw-r--r--toolkit/components/telemetry/build_scripts/mozparsers/parse_scalars.py503
-rw-r--r--toolkit/components/telemetry/build_scripts/mozparsers/parse_user_interactions.py256
-rw-r--r--toolkit/components/telemetry/build_scripts/mozparsers/shared_telemetry_utils.py185
-rw-r--r--toolkit/components/telemetry/build_scripts/run_glean_parser.py17
-rw-r--r--toolkit/components/telemetry/build_scripts/setup.py32
20 files changed, 3912 insertions, 0 deletions
diff --git a/toolkit/components/telemetry/build_scripts/README.md b/toolkit/components/telemetry/build_scripts/README.md
new file mode 100644
index 0000000000..4823580735
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/README.md
@@ -0,0 +1,5 @@
+# Telemetry Registries Parsers
+This package exports the parsers for Mozilla's probes registries. These registry file contains the definitions for the different probes (i.e. [scalars](https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/telemetry/collection/scalars.html), [histograms](https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/telemetry/collection/histograms.html) and [events](https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/telemetry/collection/events.html)) that can be used to collect data.
+
+# License
+Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/toolkit/components/telemetry/build_scripts/gen_event_data.py b/toolkit/components/telemetry/build_scripts/gen_event_data.py
new file mode 100644
index 0000000000..2e321cea72
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_event_data.py
@@ -0,0 +1,227 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out event information for C++. The events are defined
+# in a file provided as a command-line argument.
+
+import itertools
+import json
+import sys
+from collections import OrderedDict
+from os import path
+
+from mozparsers import parse_events
+from mozparsers.shared_telemetry_utils import ParserError, static_assert
+
+COMPONENTS_PATH = path.abspath(
+ path.join(path.dirname(__file__), path.pardir, path.pardir)
+)
+sys.path.append(
+ path.join(COMPONENTS_PATH, "glean", "build_scripts", "glean_parser_ext")
+)
+from string_table import StringTable
+
+# The banner/text at the top of the generated file.
+banner = """/* This file is auto-generated, only for internal use in TelemetryEvent.h,
+ see gen_event_data.py. */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryEventData_h
+#define mozilla_TelemetryEventData_h
+#include "core/EventInfo.h"
+#include "nsITelemetry.h"
+namespace {
+"""
+
+file_footer = """\
+} // namespace
+#endif // mozilla_TelemetryEventData_h
+"""
+
+
+def write_extra_table(events, output, string_table):
+ table_name = "gExtraKeysTable"
+ extra_table = []
+ extra_count = 0
+
+ print("#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+ print("const uint32_t %s[] = {" % table_name, file=output)
+ print("#else", file=output)
+ print("constexpr uint32_t %s[] = {" % table_name, file=output)
+ print("#endif", file=output)
+
+ for e in events:
+ extra_index = 0
+ extra_keys = e.extra_keys
+ if len(extra_keys) > 0:
+ extra_index = extra_count
+ extra_count += len(extra_keys)
+ indexes = string_table.stringIndexes(extra_keys)
+
+ print(
+ " // %s, [%s], [%s]"
+ % (e.category, ", ".join(e.methods), ", ".join(e.objects)),
+ file=output,
+ )
+ print(" // extra_keys: %s" % ", ".join(extra_keys), file=output)
+ print(" %s," % ", ".join(map(str, indexes)), file=output)
+
+ extra_table.append((extra_index, len(extra_keys)))
+
+ print("};", file=output)
+ static_assert(output, "sizeof(%s) <= UINT32_MAX" % table_name, "index overflow")
+
+ return extra_table
+
+
+def write_common_event_table(events, output, string_table, extra_table):
+ table_name = "gCommonEventInfo"
+
+ print("#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+ print("const CommonEventInfo %s[] = {" % table_name, file=output)
+ print("#else", file=output)
+ print("constexpr CommonEventInfo %s[] = {" % table_name, file=output)
+ print("#endif", file=output)
+
+ for e, extras in zip(events, extra_table):
+ # Write a comment to make the file human-readable.
+ print(" // category: %s" % e.category, file=output)
+ print(" // methods: [%s]" % ", ".join(e.methods), file=output)
+ print(" // objects: [%s]" % ", ".join(e.objects), file=output)
+
+ # Write the common info structure
+ print(
+ " {%d, %d, %d, %d, %s, %s, %s },"
+ % (
+ string_table.stringIndex(e.category),
+ string_table.stringIndex(e.expiry_version),
+ extras[0], # extra keys index
+ extras[1], # extra keys count
+ e.dataset,
+ " | ".join(e.record_in_processes_enum),
+ " | ".join(e.products_enum),
+ ),
+ file=output,
+ )
+
+ print("};", file=output)
+ static_assert(output, "sizeof(%s) <= UINT32_MAX" % table_name, "index overflow")
+
+
+def write_event_table(events, output, string_table):
+ table_name = "gEventInfo"
+
+ print("#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+ print("const EventInfo %s[] = {" % table_name, file=output)
+ print("#else", file=output)
+ print("constexpr EventInfo %s[] = {" % table_name, file=output)
+ print("#endif", file=output)
+
+ for common_info_index, e in enumerate(events):
+ for method_name, object_name in itertools.product(e.methods, e.objects):
+ print(
+ " // category: %s, method: %s, object: %s"
+ % (e.category, method_name, object_name),
+ file=output,
+ )
+
+ print(
+ " {gCommonEventInfo[%d], %d, %d},"
+ % (
+ common_info_index,
+ string_table.stringIndex(method_name),
+ string_table.stringIndex(object_name),
+ ),
+ file=output,
+ )
+
+ print("};", file=output)
+ static_assert(output, "sizeof(%s) <= UINT32_MAX" % table_name, "index overflow")
+
+
+def generate_JSON_definitions(output, *filenames):
+ """Write the event definitions to a JSON file.
+
+ :param output: the file to write the content to.
+ :param filenames: a list of filenames provided by the build system.
+ We only support a single file.
+ """
+ # Load the event data.
+ events = []
+ for filename in filenames:
+ try:
+ batch = parse_events.load_events(filename, True)
+ events.extend(batch)
+ except ParserError as ex:
+ print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr)
+ sys.exit(1)
+
+ event_definitions = OrderedDict()
+ for event in events:
+ category = event.category
+
+ if category not in event_definitions:
+ event_definitions[category] = OrderedDict()
+
+ event_definitions[category][event.name] = OrderedDict(
+ {
+ "methods": event.methods,
+ "objects": event.objects,
+ "extra_keys": event.extra_keys,
+ "record_on_release": True
+ if event.dataset_short == "opt-out"
+ else False,
+ # We don't expire dynamic-builtin scalars: they're only meant for
+ # use in local developer builds anyway. They will expire when rebuilding.
+ "expires": event.expiry_version,
+ "expired": False,
+ "products": event.products,
+ }
+ )
+
+ json.dump(event_definitions, output, sort_keys=True)
+
+
+def main(output, *filenames):
+ # Load the event data.
+ events = []
+ for filename in filenames:
+ try:
+ batch = parse_events.load_events(filename, True)
+ events.extend(batch)
+ except ParserError as ex:
+ print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr)
+ sys.exit(1)
+
+ # Write the scalar data file.
+ print(banner, file=output)
+ print(file_header, file=output)
+
+ # Write the extra keys table.
+ string_table = StringTable()
+ extra_table = write_extra_table(events, output, string_table)
+ print("", file=output)
+
+ # Write a table with the common event data.
+ write_common_event_table(events, output, string_table, extra_table)
+ print("", file=output)
+
+ # Write the data for individual events.
+ write_event_table(events, output, string_table)
+ print("", file=output)
+
+ # Write the string table.
+ string_table_name = "gEventsStringTable"
+ string_table.writeDefinition(output, string_table_name)
+ static_assert(
+ output, "sizeof(%s) <= UINT32_MAX" % string_table_name, "index overflow"
+ )
+ print("", file=output)
+
+ print(file_footer, file=output)
+
+
+if __name__ == "__main__":
+ main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_event_enum.py b/toolkit/components/telemetry/build_scripts/gen_event_enum.py
new file mode 100644
index 0000000000..9dd418b3dd
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_event_enum.py
@@ -0,0 +1,81 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out C++ enum definitions that represent the different event types.
+#
+# The events are defined in files provided as command-line arguments.
+
+import sys
+
+import buildconfig
+from mozparsers import parse_events
+from mozparsers.shared_telemetry_utils import ParserError
+
+banner = """/* This file is auto-generated, see gen_event_enum.py. */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryEventEnums_h
+#define mozilla_TelemetryEventEnums_h
+
+#include <stdint.h>
+
+namespace mozilla {
+namespace Telemetry {
+enum class EventID : uint32_t {\
+"""
+
+file_footer = """\
+};
+} // namespace mozilla
+} // namespace Telemetry
+#endif // mozilla_TelemetryEventEnums_h
+"""
+
+
+def main(output, *filenames):
+ # Load the events first.
+ events = []
+ for filename in filenames:
+ try:
+ batch = parse_events.load_events(filename, True)
+ events.extend(batch)
+ except ParserError as ex:
+ print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr)
+ sys.exit(1)
+
+ grouped = dict()
+ index = 0
+ for e in events:
+ category = e.category
+ if category not in grouped:
+ grouped[category] = []
+ grouped[category].append((index, e))
+ index += len(e.enum_labels)
+
+ # Write the enum file.
+ print(banner, file=output)
+ print(file_header, file=output)
+
+ for category, indexed in sorted(grouped.items()):
+ category_cpp = indexed[0][1].category_cpp
+
+ print(" // category: %s" % category, file=output)
+
+ for event_index, e in indexed:
+ if e.record_on_os(buildconfig.substs["OS_TARGET"]):
+ for offset, label in enumerate(e.enum_labels):
+ print(
+ " %s_%s = %d," % (category_cpp, label, event_index + offset),
+ file=output,
+ )
+
+ print(" // meta", file=output)
+ print(" EventCount = %d," % index, file=output)
+
+ print(file_footer, file=output)
+
+
+if __name__ == "__main__":
+ main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_histogram_data.py b/toolkit/components/telemetry/build_scripts/gen_histogram_data.py
new file mode 100644
index 0000000000..a203dde9f9
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_histogram_data.py
@@ -0,0 +1,297 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out histogram information for C++. The histograms are defined
+# in a file provided as a command-line argument.
+
+import sys
+from os import path
+
+import buildconfig
+from mozparsers import parse_histograms
+from mozparsers.shared_telemetry_utils import ParserError, static_assert
+
+COMPONENTS_PATH = path.abspath(
+ path.join(path.dirname(__file__), path.pardir, path.pardir)
+)
+sys.path.append(
+ path.join(COMPONENTS_PATH, "glean", "build_scripts", "glean_parser_ext")
+)
+from string_table import StringTable
+
+banner = """/* This file is auto-generated, see gen_histogram_data.py. */
+"""
+
+
+def print_array_entry(
+ output,
+ histogram,
+ name_index,
+ exp_index,
+ label_index,
+ label_count,
+ key_index,
+ key_count,
+ store_index,
+ store_count,
+):
+ if histogram.record_on_os(buildconfig.substs["OS_TARGET"]):
+ print(
+ " { %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %s, %s, %s, %s, %s, %s },"
+ % (
+ histogram.low(),
+ histogram.high(),
+ histogram.n_buckets(),
+ name_index,
+ exp_index,
+ label_count,
+ key_count,
+ store_count,
+ label_index,
+ key_index,
+ store_index,
+ " | ".join(histogram.record_in_processes_enum()),
+ "true" if histogram.keyed() else "false",
+ histogram.nsITelemetry_kind(),
+ histogram.dataset(),
+ " | ".join(histogram.products_enum()),
+ ),
+ file=output,
+ )
+
+
+def write_histogram_table(output, histograms):
+ string_table = StringTable()
+
+ label_table = []
+ label_count = 0
+ keys_table = []
+ keys_count = 0
+ store_table = []
+ total_store_count = 0
+
+ print("constexpr HistogramInfo gHistogramInfos[] = {", file=output)
+ for histogram in histograms:
+ name_index = string_table.stringIndex(histogram.name())
+ exp_index = string_table.stringIndex(histogram.expiration())
+
+ labels = histogram.labels()
+ label_index = 0
+ if len(labels) > 0:
+ label_index = label_count
+ label_table.append((histogram.name(), string_table.stringIndexes(labels)))
+ label_count += len(labels)
+
+ keys = histogram.keys()
+ key_index = 0
+ if len(keys) > 0:
+ key_index = keys_count
+ keys_table.append((histogram.name(), string_table.stringIndexes(keys)))
+ keys_count += len(keys)
+
+ stores = histogram.record_into_store()
+ store_index = 0
+ if stores == ["main"]:
+ # if count == 1 && offset == UINT16_MAX -> only main store
+ store_index = "UINT16_MAX"
+ else:
+ store_index = total_store_count
+ store_table.append((histogram.name(), string_table.stringIndexes(stores)))
+ total_store_count += len(stores)
+
+ print_array_entry(
+ output,
+ histogram,
+ name_index,
+ exp_index,
+ label_index,
+ len(labels),
+ key_index,
+ len(keys),
+ store_index,
+ len(stores),
+ )
+ print("};\n", file=output)
+
+ strtab_name = "gHistogramStringTable"
+ string_table.writeDefinition(output, strtab_name)
+ static_assert(output, "sizeof(%s) <= UINT32_MAX" % strtab_name, "index overflow")
+
+ print("\n#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+ print("const uint32_t gHistogramLabelTable[] = {", file=output)
+ print("#else", file=output)
+ print("constexpr uint32_t gHistogramLabelTable[] = {", file=output)
+ print("#endif", file=output)
+ for name, indexes in label_table:
+ print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output)
+ print("};", file=output)
+ static_assert(
+ output, "sizeof(gHistogramLabelTable) <= UINT16_MAX", "index overflow"
+ )
+
+ print("\n#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+ print("const uint32_t gHistogramKeyTable[] = {", file=output)
+ print("#else", file=output)
+ print("constexpr uint32_t gHistogramKeyTable[] = {", file=output)
+ print("#endif", file=output)
+ for name, indexes in keys_table:
+ print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output)
+ print("};", file=output)
+ static_assert(output, "sizeof(gHistogramKeyTable) <= UINT16_MAX", "index overflow")
+
+ store_table_name = "gHistogramStoresTable"
+ print("\n#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+ print("const uint32_t {}[] = {{".format(store_table_name), file=output)
+ print("#else", file=output)
+ print("constexpr uint32_t {}[] = {{".format(store_table_name), file=output)
+ print("#endif", file=output)
+ for name, indexes in store_table:
+ print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output)
+ print("};", file=output)
+ static_assert(
+ output, "sizeof(%s) <= UINT16_MAX" % store_table_name, "index overflow"
+ )
+
+
+# Write out static asserts for histogram data. We'd prefer to perform
+# these checks in this script itself, but since several histograms
+# (generally enumerated histograms) use compile-time constants for
+# their upper bounds, we have to let the compiler do the checking.
+
+
+def static_asserts_for_boolean(output, histogram):
+ pass
+
+
+def static_asserts_for_flag(output, histogram):
+ pass
+
+
+def static_asserts_for_count(output, histogram):
+ pass
+
+
+def static_asserts_for_enumerated(output, histogram):
+ n_values = histogram.high()
+ static_assert(
+ output, "%s > 2" % n_values, "Not enough values for %s" % histogram.name()
+ )
+
+
+def shared_static_asserts(output, histogram):
+ name = histogram.name()
+ low = histogram.low()
+ high = histogram.high()
+ n_buckets = histogram.n_buckets()
+ static_assert(output, "%s < %s" % (low, high), "low >= high for %s" % name)
+ static_assert(output, "%s > 2" % n_buckets, "Not enough values for %s" % name)
+ static_assert(output, "%s >= 1" % low, "Incorrect low value for %s" % name)
+ static_assert(
+ output,
+ "%s > %s" % (high, n_buckets),
+ "high must be > number of buckets for %s;"
+ " you may want an enumerated histogram" % name,
+ )
+
+
+def static_asserts_for_linear(output, histogram):
+ shared_static_asserts(output, histogram)
+
+
+def static_asserts_for_exponential(output, histogram):
+ shared_static_asserts(output, histogram)
+
+
+def write_histogram_static_asserts(output, histograms):
+ print(
+ """
+// Perform the checks at the beginning of HistogramGet at
+// compile time, so that incorrect histogram definitions
+// give compile-time errors, not runtime errors.""",
+ file=output,
+ )
+
+ table = {
+ "boolean": static_asserts_for_boolean,
+ "flag": static_asserts_for_flag,
+ "count": static_asserts_for_count,
+ "enumerated": static_asserts_for_enumerated,
+ "categorical": static_asserts_for_enumerated,
+ "linear": static_asserts_for_linear,
+ "exponential": static_asserts_for_exponential,
+ }
+
+ target_os = buildconfig.substs["OS_TARGET"]
+ for histogram in histograms:
+ kind = histogram.kind()
+ if not histogram.record_on_os(target_os):
+ continue
+
+ if kind not in table:
+ raise Exception(
+ 'Unknown kind "%s" for histogram "%s".' % (kind, histogram.name())
+ )
+ fn = table[kind]
+ fn(output, histogram)
+
+
+def write_histogram_ranges(output, histograms):
+ # This generates static data to avoid costly initialization of histograms
+ # (especially exponential ones which require log and exp calls) at runtime.
+ # The format must exactly match that required in histogram.cc, which is
+ # 0, buckets..., INT_MAX. Additionally, the list ends in a 0 to aid asserts
+ # that validate that the length of the ranges list is correct.U cache miss.
+ print("#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+ print("const int gHistogramBucketLowerBounds[] = {", file=output)
+ print("#else", file=output)
+ print("constexpr int gHistogramBucketLowerBounds[] = {", file=output)
+ print("#endif", file=output)
+
+ # Print the dummy buckets for expired histograms, and set the offset to match.
+ print("0,1,2,INT_MAX,", file=output)
+ offset = 4
+ ranges_offsets = {}
+
+ for histogram in histograms:
+ ranges = tuple(histogram.ranges())
+ if ranges not in ranges_offsets:
+ ranges_offsets[ranges] = offset
+ # Suffix each ranges listing with INT_MAX, to match histogram.cc's
+ # expected format.
+ offset += len(ranges) + 1
+ print(",".join(map(str, ranges)), ",INT_MAX,", file=output)
+ print("0};", file=output)
+
+ if offset > 32767:
+ raise Exception("Histogram offsets exceeded maximum value for an int16_t.")
+
+ target_os = buildconfig.substs["OS_TARGET"]
+ print("#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+ print("const int16_t gHistogramBucketLowerBoundIndex[] = {", file=output)
+ print("#else", file=output)
+ print("constexpr int16_t gHistogramBucketLowerBoundIndex[] = {", file=output)
+ print("#endif", file=output)
+ for histogram in histograms:
+ if histogram.record_on_os(target_os):
+ our_offset = ranges_offsets[tuple(histogram.ranges())]
+ print("%d," % our_offset, file=output)
+
+ print("};", file=output)
+
+
+def main(output, *filenames):
+ try:
+ histograms = list(parse_histograms.from_files(filenames))
+ except ParserError as ex:
+ print("\nError processing histograms:\n" + str(ex) + "\n")
+ sys.exit(1)
+
+ print(banner, file=output)
+ write_histogram_table(output, histograms)
+ write_histogram_ranges(output, histograms)
+ write_histogram_static_asserts(output, histograms)
+
+
+if __name__ == "__main__":
+ main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_histogram_enum.py b/toolkit/components/telemetry/build_scripts/gen_histogram_enum.py
new file mode 100644
index 0000000000..8f224c5140
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_histogram_enum.py
@@ -0,0 +1,147 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out a C++ enum definition whose members are the names of
+# histograms as well as the following other members:
+#
+# - HistogramCount
+# - HistogramFirstUseCounter
+# - HistogramLastUseCounter
+# - HistogramUseCounterCount
+#
+# The histograms are defined in files provided as command-line arguments.
+
+import itertools
+import sys
+
+import buildconfig
+from mozparsers import parse_histograms
+from mozparsers.shared_telemetry_utils import ParserError
+
+banner = """/* This file is auto-generated, see gen_histogram_enum.py. */
+"""
+
+header = """
+#ifndef mozilla_TelemetryHistogramEnums_h
+#define mozilla_TelemetryHistogramEnums_h
+
+#include <cstdint>
+#include <type_traits>
+
+namespace mozilla {
+namespace Telemetry {
+"""
+
+footer = """
+} // namespace mozilla
+} // namespace Telemetry
+#endif // mozilla_TelemetryHistogramEnums_h"""
+
+
+def get_histogram_typename(histogram):
+ name = histogram.name()
+ if name.startswith("USE_COUNTER2_"):
+ return "UseCounterWorker" if name.endswith("_WORKER") else "UseCounter"
+ return None
+
+
+def main(output, *filenames):
+ # Print header.
+ print(banner, file=output)
+ print(header, file=output)
+
+ # Load the histograms.
+ try:
+ all_histograms = list(parse_histograms.from_files(filenames))
+ except ParserError as ex:
+ print("\nError processing histograms:\n" + str(ex) + "\n")
+ sys.exit(1)
+
+ groups = itertools.groupby(all_histograms, get_histogram_typename)
+
+ # Print the histogram enums.
+ # Note that parse_histograms.py guarantees that all of the
+ # USE_COUNTER2_*_WORKER and USE_COUNTER2_* histograms are both defined in a
+ # contiguous block.
+ print("enum HistogramID : uint32_t {", file=output)
+ seen_group_types = {"UseCounter": False, "UseCounterWorker": False}
+ for (group_type, histograms) in groups:
+ if group_type is not None:
+ assert isinstance(group_type, str)
+ assert group_type in seen_group_types.keys()
+ assert not seen_group_types[group_type]
+ seen_group_types[group_type] = True
+ # The Histogram*DUMMY enum variables are used to make the computation
+ # of Histogram{First,Last}* easier. Otherwise, we'd have to special
+ # case the first and last histogram in the group.
+ print(" HistogramFirst%s," % group_type, file=output)
+ print(
+ " Histogram{0}DUMMY1 = HistogramFirst{0} - 1,".format(group_type),
+ file=output,
+ )
+
+ for histogram in histograms:
+ if histogram.record_on_os(buildconfig.substs["OS_TARGET"]):
+ print(" %s," % histogram.name(), file=output)
+
+ if group_type is not None:
+ assert isinstance(group_type, str)
+ print(" Histogram%sDUMMY2," % group_type, file=output)
+ print(
+ " HistogramLast{0} = Histogram{0}DUMMY2 - 1,".format(group_type),
+ file=output,
+ )
+
+ print(" HistogramCount,", file=output)
+
+ for (key, value) in sorted(seen_group_types.items()):
+ if value:
+ print(
+ " Histogram{0}Count = HistogramLast{0} - HistogramFirst{0} + 1,".format(
+ key
+ ),
+ file=output,
+ )
+ else:
+ print(" HistogramFirst%s = 0," % key, file=output)
+ print(" HistogramLast%s = 0," % key, file=output)
+ print(" Histogram%sCount = 0," % key, file=output)
+
+ print("};", file=output)
+
+ # Write categorical label enums.
+ categorical = filter(lambda h: h.kind() == "categorical", all_histograms)
+ categorical = filter(
+ lambda h: h.record_on_os(buildconfig.substs["OS_TARGET"]), categorical
+ )
+ enums = [("LABELS_" + h.name(), h.labels(), h.name()) for h in categorical]
+ for name, labels, _ in enums:
+ print("\nenum class %s : uint32_t {" % name, file=output)
+ print(" %s" % ",\n ".join(labels), file=output)
+ print("};", file=output)
+
+ print(
+ "\ntemplate<class T> struct IsCategoricalLabelEnum : std::false_type {};",
+ file=output,
+ )
+ for name, _, _ in enums:
+ print(
+ "template<> struct IsCategoricalLabelEnum<%s> : std::true_type {};" % name,
+ file=output,
+ )
+
+ print("\ntemplate<class T> struct CategoricalLabelId {};", file=output)
+ for name, _, id in enums:
+ print(
+ "template<> struct CategoricalLabelId<%s> : "
+ "std::integral_constant<uint32_t, %s> {};" % (name, id),
+ file=output,
+ )
+
+ # Footer.
+ print(footer, file=output)
+
+
+if __name__ == "__main__":
+ main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_histogram_phf.py b/toolkit/components/telemetry/build_scripts/gen_histogram_phf.py
new file mode 100644
index 0000000000..38c7245506
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_histogram_phf.py
@@ -0,0 +1,73 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from mozparsers.shared_telemetry_utils import ParserError
+from perfecthash import PerfectHash
+
+PHFSIZE = 1024
+
+import sys
+
+import buildconfig
+from mozparsers import parse_histograms
+
+banner = """/* This file is auto-generated, see gen_histogram_phf.py. */
+"""
+
+header = """
+#ifndef mozilla_TelemetryHistogramNameMap_h
+#define mozilla_TelemetryHistogramNameMap_h
+
+#include "mozilla/PerfectHash.h"
+
+namespace mozilla {
+namespace Telemetry {
+"""
+
+footer = """
+} // namespace mozilla
+} // namespace Telemetry
+#endif // mozilla_TelemetryHistogramNameMap_h
+"""
+
+
+def main(output, *filenames):
+ """
+ Generate a Perfect Hash Table for the Histogram name -> Histogram ID lookup.
+ The table is immutable once generated and we can avoid any dynamic memory allocation.
+ """
+
+ output.write(banner)
+ output.write(header)
+
+ try:
+ histograms = list(parse_histograms.from_files(filenames))
+ histograms = [
+ h for h in histograms if h.record_on_os(buildconfig.substs["OS_TARGET"])
+ ]
+ except ParserError as ex:
+ print("\nError processing histograms:\n" + str(ex) + "\n")
+ sys.exit(1)
+
+ histograms = [
+ (bytearray(hist.name(), "ascii"), idx) for (idx, hist) in enumerate(histograms)
+ ]
+ name_phf = PerfectHash(histograms, PHFSIZE)
+
+ output.write(
+ name_phf.cxx_codegen(
+ name="HistogramIDByNameLookup",
+ entry_type="uint32_t",
+ lower_entry=lambda x: str(x[1]),
+ key_type="const nsACString&",
+ key_bytes="aKey.BeginReading()",
+ key_length="aKey.Length()",
+ )
+ )
+
+ output.write(footer)
+
+
+if __name__ == "__main__":
+ main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_process_data.py b/toolkit/components/telemetry/build_scripts/gen_process_data.py
new file mode 100644
index 0000000000..2a494689ad
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_process_data.py
@@ -0,0 +1,80 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out processes data for C++. The processes are defined
+# in a file provided as a command-line argument.
+
+import collections
+import sys
+
+from mozparsers.shared_telemetry_utils import ParserError, load_yaml_file
+
+# The banner/text at the top of the generated file.
+banner = """/* This file is auto-generated from Telemetry build scripts,
+ see gen_process_data.py. */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryProcessData_h
+#define mozilla_TelemetryProcessData_h
+
+#include "mozilla/TelemetryProcessEnums.h"
+
+namespace mozilla {
+namespace Telemetry {
+"""
+
+file_footer = """
+} // namespace Telemetry
+} // namespace mozilla
+#endif // mozilla_TelemetryProcessData_h"""
+
+
+def to_enum_label(name):
+ return name.title().replace("_", "")
+
+
+def write_processes_data(processes, output):
+ def p(line):
+ print(line, file=output)
+
+ processes = collections.OrderedDict(processes)
+
+ p("static GeckoProcessType ProcessIDToGeckoProcessType[%d] = {" % len(processes))
+ for i, (name, value) in enumerate(sorted(processes.items())):
+ p(
+ " /* %d: ProcessID::%s = */ %s,"
+ % (i, to_enum_label(name), value["gecko_enum"])
+ )
+ p("};")
+ p("")
+ p("#if defined(_MSC_VER) && !defined(__clang__)")
+ p("static const char* const ProcessIDToString[%d] = {" % len(processes))
+ p("#else")
+ p("static constexpr const char* ProcessIDToString[%d] = {" % len(processes))
+ p("#endif")
+ for i, (name, value) in enumerate(sorted(processes.items())):
+ p(' /* %d: ProcessID::%s = */ "%s",' % (i, to_enum_label(name), name))
+ p("};")
+
+
+def main(output, *filenames):
+ if len(filenames) > 1:
+ raise Exception("We don't support loading from more than one file.")
+
+ try:
+ processes = load_yaml_file(filenames[0])
+
+ # Write the process data file.
+ print(banner, file=output)
+ print(file_header, file=output)
+ write_processes_data(processes, output)
+ print(file_footer, file=output)
+ except ParserError as ex:
+ print("\nError generating processes data:\n" + str(ex) + "\n")
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_process_enum.py b/toolkit/components/telemetry/build_scripts/gen_process_enum.py
new file mode 100644
index 0000000000..bfe2d65e43
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_process_enum.py
@@ -0,0 +1,69 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out processes data for C++. The processes are defined
+# in a file provided as a command-line argument.
+
+import collections
+import sys
+
+from mozparsers.shared_telemetry_utils import ParserError, load_yaml_file
+
+# The banner/text at the top of the generated file.
+banner = """/* This file is auto-generated from Telemetry build scripts,
+ see gen_process_enum.py. */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryProcessEnums_h
+#define mozilla_TelemetryProcessEnums_h
+
+#include <cstdint>
+
+namespace mozilla {
+namespace Telemetry {
+"""
+
+file_footer = """
+} // namespace Telemetry
+} // namespace mozilla
+#endif // mozilla_TelemetryProcessEnums_h"""
+
+
+def to_enum_label(name):
+ return name.title().replace("_", "")
+
+
+def write_processes_enum(processes, output):
+ def p(line):
+ print(line, file=output)
+
+ processes = collections.OrderedDict(processes)
+
+ p("enum class ProcessID : uint32_t {")
+ for i, (name, _) in enumerate(sorted(processes.items())):
+ p(" %s = %d," % (to_enum_label(name), i))
+ p(" Count = %d" % len(processes))
+ p("};")
+
+
+def main(output, *filenames):
+ if len(filenames) > 1:
+ raise Exception("We don't support loading from more than one file.")
+
+ try:
+ processes = load_yaml_file(filenames[0])
+
+ # Write the process data file.
+ print(banner, file=output)
+ print(file_header, file=output)
+ write_processes_enum(processes, output)
+ print(file_footer, file=output)
+ except ParserError as ex:
+ print("\nError generating processes enums:\n" + str(ex) + "\n")
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_scalar_data.py b/toolkit/components/telemetry/build_scripts/gen_scalar_data.py
new file mode 100644
index 0000000000..6ef1f457b5
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_scalar_data.py
@@ -0,0 +1,216 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out scalar information for C++. The scalars are defined
+# in a file provided as a command-line argument.
+
+import json
+import sys
+from collections import OrderedDict
+from os import path
+
+import buildconfig
+from mozparsers import parse_scalars
+from mozparsers.shared_telemetry_utils import ParserError, static_assert
+
+COMPONENTS_PATH = path.abspath(
+ path.join(path.dirname(__file__), path.pardir, path.pardir)
+)
+sys.path.append(
+ path.join(COMPONENTS_PATH, "glean", "build_scripts", "glean_parser_ext")
+)
+from string_table import StringTable
+
+# The banner/text at the top of the generated file.
+banner = """/* This file is auto-generated, only for internal use in TelemetryScalar.h,
+ see gen_scalar_data.py. */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryScalarData_h
+#define mozilla_TelemetryScalarData_h
+#include "core/ScalarInfo.h"
+#include "nsITelemetry.h"
+namespace {
+"""
+
+file_footer = """\
+} // namespace
+#endif // mozilla_TelemetryScalarData_h
+"""
+
+
+def write_scalar_info(
+ scalar,
+ output,
+ name_index,
+ expiration_index,
+ store_index,
+ store_count,
+ key_count,
+ key_index,
+):
+ """Writes a scalar entry to the output file.
+
+ :param scalar: a ScalarType instance describing the scalar.
+ :param output: the output stream.
+ :param name_index: the index of the scalar name in the strings table.
+ :param expiration_index: the index of the expiration version in the strings table.
+ """
+ if scalar.record_on_os(buildconfig.substs["OS_TARGET"]):
+ print(
+ " {{ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }},".format(
+ scalar.nsITelemetry_kind,
+ name_index,
+ expiration_index,
+ scalar.dataset,
+ " | ".join(scalar.record_in_processes_enum),
+ "true" if scalar.keyed else "false",
+ key_count,
+ key_index,
+ " | ".join(scalar.products_enum),
+ store_count,
+ store_index,
+ ),
+ file=output,
+ )
+
+
+def write_scalar_tables(scalars, output):
+ """Writes the scalar and strings tables to an header file.
+
+ :param scalars: a list of ScalarType instances describing the scalars.
+ :param output: the output stream.
+ """
+ string_table = StringTable()
+
+ store_table = []
+ total_store_count = 0
+
+ keys_table = []
+ total_key_count = 0
+
+ print("const ScalarInfo gScalars[] = {", file=output)
+ for s in scalars:
+ # We add both the scalar label and the expiration string to the strings
+ # table.
+ name_index = string_table.stringIndex(s.label)
+ exp_index = string_table.stringIndex(s.expires)
+
+ stores = s.record_into_store
+ store_index = 0
+ if stores == ["main"]:
+ # if count == 1 && offset == UINT16_MAX -> only main store
+ store_index = "UINT16_MAX"
+ else:
+ store_index = total_store_count
+ store_table.append((s.label, string_table.stringIndexes(stores)))
+ total_store_count += len(stores)
+
+ keys = s.keys
+ key_index = 0
+ if len(keys) > 0:
+ key_index = total_key_count
+ keys_table.append((s.label, string_table.stringIndexes(keys)))
+ total_key_count += len(keys)
+
+ # Write the scalar info entry.
+ write_scalar_info(
+ s,
+ output,
+ name_index,
+ exp_index,
+ store_index,
+ len(stores),
+ len(keys),
+ key_index,
+ )
+ print("};", file=output)
+
+ string_table_name = "gScalarsStringTable"
+ string_table.writeDefinition(output, string_table_name)
+ static_assert(
+ output, "sizeof(%s) <= UINT32_MAX" % string_table_name, "index overflow"
+ )
+
+ print("\nconstexpr uint32_t gScalarKeysTable[] = {", file=output)
+ for name, indexes in keys_table:
+ print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output)
+ print("};", file=output)
+
+ store_table_name = "gScalarStoresTable"
+ print("\n#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+ print("const uint32_t {}[] = {{".format(store_table_name), file=output)
+ print("#else", file=output)
+ print("constexpr uint32_t {}[] = {{".format(store_table_name), file=output)
+ print("#endif", file=output)
+ for name, indexes in store_table:
+ print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output)
+ print("};", file=output)
+ static_assert(
+ output, "sizeof(%s) <= UINT16_MAX" % store_table_name, "index overflow"
+ )
+
+
+def parse_scalar_definitions(filenames):
+ scalars = []
+ for filename in filenames:
+ try:
+ batch = parse_scalars.load_scalars(filename)
+ scalars.extend(batch)
+ except ParserError as ex:
+ print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr)
+ sys.exit(1)
+ return scalars
+
+
+def generate_JSON_definitions(output, *filenames):
+ """Write the scalar definitions to a JSON file.
+
+ :param output: the file to write the content to.
+ :param filenames: a list of filenames provided by the build system.
+ We only support a single file.
+ """
+ scalars = parse_scalar_definitions(filenames)
+
+ scalar_definitions = OrderedDict()
+ for scalar in scalars:
+ category = scalar.category
+
+ if category not in scalar_definitions:
+ scalar_definitions[category] = OrderedDict()
+
+ scalar_definitions[category][scalar.name] = OrderedDict(
+ {
+ "kind": scalar.nsITelemetry_kind,
+ "keyed": scalar.keyed,
+ "keys": scalar.keys,
+ "record_on_release": True
+ if scalar.dataset_short == "opt-out"
+ else False,
+ # We don't expire dynamic-builtin scalars: they're only meant for
+ # use in local developer builds anyway. They will expire when rebuilding.
+ "expired": False,
+ "stores": scalar.record_into_store,
+ "expires": scalar.expires,
+ "products": scalar.products,
+ }
+ )
+
+ json.dump(scalar_definitions, output)
+
+
+def main(output, *filenames):
+ # Load the scalars first.
+ scalars = parse_scalar_definitions(filenames)
+
+ # Write the scalar data file.
+ print(banner, file=output)
+ print(file_header, file=output)
+ write_scalar_tables(scalars, output)
+ print(file_footer, file=output)
+
+
+if __name__ == "__main__":
+ main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_scalar_enum.py b/toolkit/components/telemetry/build_scripts/gen_scalar_enum.py
new file mode 100644
index 0000000000..321cd047d7
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_scalar_enum.py
@@ -0,0 +1,60 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out a C++ enum definition whose members are the names of
+# scalar types.
+#
+# The scalars are defined in files provided as command-line arguments.
+
+import sys
+
+import buildconfig
+from mozparsers import parse_scalars
+from mozparsers.shared_telemetry_utils import ParserError
+
+banner = """/* This file is auto-generated, see gen_scalar_enum.py. */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryScalarEnums_h
+#define mozilla_TelemetryScalarEnums_h
+namespace mozilla {
+namespace Telemetry {
+enum class ScalarID : uint32_t {\
+"""
+
+file_footer = """\
+};
+} // namespace mozilla
+} // namespace Telemetry
+#endif // mozilla_TelemetryScalarEnums_h
+"""
+
+
+def main(output, *filenames):
+ # Load the scalars first.
+ scalars = []
+ for filename in filenames:
+ try:
+ batch = parse_scalars.load_scalars(filename)
+ scalars.extend(batch)
+ except ParserError as ex:
+ print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr)
+ sys.exit(1)
+
+ # Write the enum file.
+ print(banner, file=output)
+ print(file_header, file=output)
+
+ for s in scalars:
+ if s.record_on_os(buildconfig.substs["OS_TARGET"]):
+ print(" %s," % s.enum_label, file=output)
+
+ print(" ScalarCount,", file=output)
+
+ print(file_footer, file=output)
+
+
+if __name__ == "__main__":
+ main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_userinteraction_data.py b/toolkit/components/telemetry/build_scripts/gen_userinteraction_data.py
new file mode 100644
index 0000000000..b12cbde239
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_userinteraction_data.py
@@ -0,0 +1,105 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out UserInteraction information for C++. The UserInteractions are
+# defined in a file provided as a command-line argument.
+
+import sys
+from os import path
+
+from mozparsers import parse_user_interactions
+from mozparsers.shared_telemetry_utils import ParserError, static_assert
+
+COMPONENTS_PATH = path.abspath(
+ path.join(path.dirname(__file__), path.pardir, path.pardir)
+)
+sys.path.append(
+ path.join(COMPONENTS_PATH, "glean", "build_scripts", "glean_parser_ext")
+)
+import sys
+
+from string_table import StringTable
+
+# The banner/text at the top of the generated file.
+banner = """/* This file is auto-generated, only for internal use in
+ TelemetryUserInteraction.h, see gen_userinteraction_data.py. */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryUserInteractionData_h
+#define mozilla_TelemetryUserInteractionData_h
+#include "core/UserInteractionInfo.h"
+"""
+
+file_footer = """\
+#endif // mozilla_TelemetryUserInteractionData_h
+"""
+
+
+def write_user_interaction_table(user_interactions, output, string_table):
+ head = """
+ namespace mozilla {
+ namespace Telemetry {
+ namespace UserInteractionID {
+ const static uint32_t UserInteractionCount = %d;
+ } // namespace UserInteractionID
+ } // namespace Telemetry
+ } // namespace mozilla
+ """
+
+ print(head % len(user_interactions), file=output)
+
+ print("namespace {", file=output)
+
+ table_name = "gUserInteractions"
+ print("constexpr UserInteractionInfo %s[] = {" % table_name, file=output)
+
+ for u in user_interactions:
+ name_index = string_table.stringIndex(u.label)
+ print(" UserInteractionInfo({}),".format(name_index), file=output)
+ print("};", file=output)
+
+ static_assert(
+ output,
+ "sizeof(%s) <= UINT32_MAX" % table_name,
+ "index overflow of UserInteractionInfo table %s" % table_name,
+ )
+
+ print("} // namespace", file=output)
+
+
+def main(output, *filenames):
+ # Load the UserInteraction data.
+ user_interactions = []
+ for filename in filenames:
+ try:
+ batch = parse_user_interactions.load_user_interactions(filename)
+ user_interactions.extend(batch)
+ except ParserError as ex:
+ print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr)
+ sys.exit(1)
+
+ # Write the scalar data file.
+ print(banner, file=output)
+ print(file_header, file=output)
+
+ string_table = StringTable()
+
+ # Write the data for individual UserInteractions.
+ write_user_interaction_table(user_interactions, output, string_table)
+ print("", file=output)
+
+ # Write the string table.
+ string_table_name = "gUserInteractionsStringTable"
+ string_table.writeDefinition(output, string_table_name)
+ static_assert(
+ output, "sizeof(%s) <= UINT32_MAX" % string_table_name, "index overflow"
+ )
+ print("", file=output)
+
+ print(file_footer, file=output)
+
+
+if __name__ == "__main__":
+ main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_userinteraction_phf.py b/toolkit/components/telemetry/build_scripts/gen_userinteraction_phf.py
new file mode 100644
index 0000000000..f1c7256414
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_userinteraction_phf.py
@@ -0,0 +1,70 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from mozparsers.shared_telemetry_utils import ParserError
+from perfecthash import PerfectHash
+
+PHFSIZE = 1024
+
+import sys
+
+from mozparsers import parse_user_interactions
+
+banner = """/* This file is auto-generated, see gen_userinteraction_phf.py. */
+"""
+
+header = """
+#ifndef mozilla_TelemetryUserInteractionNameMap_h
+#define mozilla_TelemetryUserInteractionNameMap_h
+
+#include "mozilla/PerfectHash.h"
+
+namespace mozilla {
+namespace Telemetry {
+"""
+
+footer = """
+} // namespace mozilla
+} // namespace Telemetry
+#endif // mozilla_TelemetryUserInteractionNameMap_h
+"""
+
+
+def main(output, *filenames):
+ """
+ Generate a Perfect Hash Table for the UserInteraction name -> UserInteraction ID lookup.
+ The table is immutable once generated and we can avoid any dynamic memory allocation.
+ """
+
+ output.write(banner)
+ output.write(header)
+
+ try:
+ user_interactions = list(parse_user_interactions.from_files(filenames))
+ except ParserError as ex:
+ print("\nError processing UserInteractions:\n" + str(ex) + "\n")
+ sys.exit(1)
+
+ user_interactions = [
+ (bytearray(ui.label, "ascii"), idx)
+ for (idx, ui) in enumerate(user_interactions)
+ ]
+ name_phf = PerfectHash(user_interactions, PHFSIZE)
+
+ output.write(
+ name_phf.cxx_codegen(
+ name="UserInteractionIDByNameLookup",
+ entry_type="uint32_t",
+ lower_entry=lambda x: str(x[1]),
+ key_type="const nsACString&",
+ key_bytes="aKey.BeginReading()",
+ key_length="aKey.Length()",
+ )
+ )
+
+ output.write(footer)
+
+
+if __name__ == "__main__":
+ main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/__init__.py b/toolkit/components/telemetry/build_scripts/mozparsers/__init__.py
new file mode 100644
index 0000000000..c580d191c1
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/__init__.py
@@ -0,0 +1,3 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_events.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_events.py
new file mode 100644
index 0000000000..09ed651917
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_events.py
@@ -0,0 +1,477 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import atexit
+import itertools
+import re
+import string
+
+import yaml
+
+from . import shared_telemetry_utils as utils
+from .shared_telemetry_utils import ParserError
+
+atexit.register(ParserError.exit_func)
+
+MAX_CATEGORY_NAME_LENGTH = 30
+MAX_METHOD_NAME_LENGTH = 20
+MAX_OBJECT_NAME_LENGTH = 20
+MAX_EXTRA_KEYS_COUNT = 10
+MAX_EXTRA_KEY_NAME_LENGTH = 15
+
+IDENTIFIER_PATTERN = r"^[a-zA-Z][a-zA-Z0-9_.]*[a-zA-Z0-9]$"
+
+
+def nice_type_name(t):
+ if issubclass(t, str):
+ return "string"
+ return t.__name__
+
+
+def convert_to_cpp_identifier(s, sep):
+ return string.capwords(s, sep).replace(sep, "")
+
+
+class OneOf:
+ """This is a placeholder type for the TypeChecker below.
+ It signals that the checked value should match one of the following arguments
+ passed to the TypeChecker constructor.
+ """
+
+ pass
+
+
+class AtomicTypeChecker:
+ """Validate a simple value against a given type"""
+
+ def __init__(self, instance_type):
+ self.instance_type = instance_type
+
+ def check(self, identifier, key, value):
+ if not isinstance(value, self.instance_type):
+ ParserError(
+ "%s: Failed type check for %s - expected %s, got %s."
+ % (
+ identifier,
+ key,
+ nice_type_name(self.instance_type),
+ nice_type_name(type(value)),
+ )
+ ).handle_later()
+
+
+class MultiTypeChecker:
+ """Validate a simple value against a list of possible types"""
+
+ def __init__(self, *instance_types):
+ if not instance_types:
+ raise Exception("At least one instance type is required.")
+ self.instance_types = instance_types
+
+ def check(self, identifier, key, value):
+ if not any(isinstance(value, i) for i in self.instance_types):
+ ParserError(
+ "%s: Failed type check for %s - got %s, expected one of:\n%s"
+ % (
+ identifier,
+ key,
+ nice_type_name(type(value)),
+ " or ".join(map(nice_type_name, self.instance_types)),
+ )
+ ).handle_later()
+
+
+class ListTypeChecker:
+ """Validate a list of values against a given type"""
+
+ def __init__(self, instance_type):
+ self.instance_type = instance_type
+
+ def check(self, identifier, key, value):
+ if len(value) < 1:
+ ParserError(
+ "%s: Failed check for %s - list should not be empty."
+ % (identifier, key)
+ ).handle_now()
+
+ for x in value:
+ if not isinstance(x, self.instance_type):
+ ParserError(
+ "%s: Failed type check for %s - expected list value type %s, got"
+ " %s."
+ % (
+ identifier,
+ key,
+ nice_type_name(self.instance_type),
+ nice_type_name(type(x)),
+ )
+ ).handle_later()
+
+
+class DictTypeChecker:
+ """Validate keys and values of a dict against a given type"""
+
+ def __init__(self, keys_instance_type, values_instance_type):
+ self.keys_instance_type = keys_instance_type
+ self.values_instance_type = values_instance_type
+
+ def check(self, identifier, key, value):
+ if len(value.keys()) < 1:
+ ParserError(
+ "%s: Failed check for %s - dict should not be empty."
+ % (identifier, key)
+ ).handle_now()
+ for x in value.keys():
+ if not isinstance(x, self.keys_instance_type):
+ ParserError(
+ "%s: Failed dict type check for %s - expected key type %s, got "
+ "%s."
+ % (
+ identifier,
+ key,
+ nice_type_name(self.keys_instance_type),
+ nice_type_name(type(x)),
+ )
+ ).handle_later()
+ for k, v in value.items():
+ if not isinstance(v, self.values_instance_type):
+ ParserError(
+ "%s: Failed dict type check for %s - "
+ "expected value type %s for key %s, got %s."
+ % (
+ identifier,
+ key,
+ nice_type_name(self.values_instance_type),
+ k,
+ nice_type_name(type(v)),
+ )
+ ).handle_later()
+
+
+def type_check_event_fields(identifier, name, definition):
+ """Perform a type/schema check on the event definition."""
+ REQUIRED_FIELDS = {
+ "objects": ListTypeChecker(str),
+ "bug_numbers": ListTypeChecker(int),
+ "notification_emails": ListTypeChecker(str),
+ "record_in_processes": ListTypeChecker(str),
+ "description": AtomicTypeChecker(str),
+ "products": ListTypeChecker(str),
+ }
+ OPTIONAL_FIELDS = {
+ "methods": ListTypeChecker(str),
+ "release_channel_collection": AtomicTypeChecker(str),
+ "expiry_version": AtomicTypeChecker(str),
+ "extra_keys": DictTypeChecker(str, str),
+ "operating_systems": ListTypeChecker(str),
+ }
+ ALL_FIELDS = REQUIRED_FIELDS.copy()
+ ALL_FIELDS.update(OPTIONAL_FIELDS)
+
+ # Check that all the required fields are available.
+ missing_fields = [f for f in REQUIRED_FIELDS.keys() if f not in definition]
+ if len(missing_fields) > 0:
+ ParserError(
+ identifier + ": Missing required fields: " + ", ".join(missing_fields)
+ ).handle_now()
+
+ # Is there any unknown field?
+ unknown_fields = [f for f in definition.keys() if f not in ALL_FIELDS]
+ if len(unknown_fields) > 0:
+ ParserError(
+ identifier + ": Unknown fields: " + ", ".join(unknown_fields)
+ ).handle_later()
+
+ # Type-check fields.
+ for k, v in definition.items():
+ ALL_FIELDS[k].check(identifier, k, v)
+
+
+def string_check(identifier, field, value, min_length=1, max_length=None, regex=None):
+ # Length check.
+ if len(value) < min_length:
+ ParserError(
+ "%s: Value '%s' for field %s is less than minimum length of %d."
+ % (identifier, value, field, min_length)
+ ).handle_later()
+ if max_length and len(value) > max_length:
+ ParserError(
+ "%s: Value '%s' for field %s is greater than maximum length of %d."
+ % (identifier, value, field, max_length)
+ ).handle_later()
+ # Regex check.
+ if regex and not re.match(regex, value):
+ ParserError(
+ '%s: String value "%s" for %s is not matching pattern "%s".'
+ % (identifier, value, field, regex)
+ ).handle_later()
+
+
+class EventData:
+ """A class representing one event."""
+
+ def __init__(self, category, name, definition, strict_type_checks=False):
+ self._category = category
+ self._name = name
+ self._definition = definition
+ self._strict_type_checks = strict_type_checks
+
+ type_check_event_fields(self.identifier, name, definition)
+
+ # Check method & object string patterns.
+ if strict_type_checks:
+ for method in self.methods:
+ string_check(
+ self.identifier,
+ field="methods",
+ value=method,
+ min_length=1,
+ max_length=MAX_METHOD_NAME_LENGTH,
+ regex=IDENTIFIER_PATTERN,
+ )
+ for obj in self.objects:
+ string_check(
+ self.identifier,
+ field="objects",
+ value=obj,
+ min_length=1,
+ max_length=MAX_OBJECT_NAME_LENGTH,
+ regex=IDENTIFIER_PATTERN,
+ )
+
+ # Check release_channel_collection
+ rcc_key = "release_channel_collection"
+ rcc = definition.get(rcc_key, "opt-in")
+ allowed_rcc = ["opt-in", "opt-out"]
+ if rcc not in allowed_rcc:
+ ParserError(
+ "%s: Value for %s should be one of: %s"
+ % (self.identifier, rcc_key, ", ".join(allowed_rcc))
+ ).handle_later()
+
+ # Check record_in_processes.
+ record_in_processes = definition.get("record_in_processes")
+ for proc in record_in_processes:
+ if not utils.is_valid_process_name(proc):
+ ParserError(
+ self.identifier + ": Unknown value in record_in_processes: " + proc
+ ).handle_later()
+
+ # Check products.
+ products = definition.get("products")
+ for product in products:
+ if not utils.is_valid_product(product) and self._strict_type_checks:
+ ParserError(
+ self.identifier + ": Unknown value in products: " + product
+ ).handle_later()
+ if utils.is_geckoview_streaming_product(product):
+ ParserError(
+ "{}: Product `{}` unsupported for Event Telemetry".format(
+ self.identifier, product
+ )
+ ).handle_later()
+
+ # Check operating_systems.
+ operating_systems = definition.get("operating_systems", [])
+ for operating_system in operating_systems:
+ if not utils.is_valid_os(operating_system):
+ ParserError(
+ self.identifier
+ + ": Unknown value in operating_systems: "
+ + operating_system
+ ).handle_later()
+
+ # Check extra_keys.
+ extra_keys = definition.get("extra_keys", {})
+ if len(extra_keys.keys()) > MAX_EXTRA_KEYS_COUNT:
+ ParserError(
+ "%s: Number of extra_keys exceeds limit %d."
+ % (self.identifier, MAX_EXTRA_KEYS_COUNT)
+ ).handle_later()
+ for key in extra_keys.keys():
+ string_check(
+ self.identifier,
+ field="extra_keys",
+ value=key,
+ min_length=1,
+ max_length=MAX_EXTRA_KEY_NAME_LENGTH,
+ regex=IDENTIFIER_PATTERN,
+ )
+
+ # Check expiry.
+ if "expiry_version" not in definition:
+ ParserError(
+ "%s: event is missing required field expiry_version" % (self.identifier)
+ ).handle_later()
+
+ # Finish setup.
+ # Historical versions of Events.yaml may contain expiration versions
+ # using the deprecated format 'N.Na1'. Those scripts set
+ # self._strict_type_checks to false.
+ expiry_version = definition.get("expiry_version", "never")
+ if (
+ not utils.validate_expiration_version(expiry_version)
+ and self._strict_type_checks
+ ):
+ ParserError(
+ "{}: invalid expiry_version: {}.".format(
+ self.identifier, expiry_version
+ )
+ ).handle_now()
+ definition["expiry_version"] = utils.add_expiration_postfix(expiry_version)
+
+ @property
+ def category(self):
+ return self._category
+
+ @property
+ def category_cpp(self):
+ # Transform e.g. category.example into CategoryExample.
+ return convert_to_cpp_identifier(self._category, ".")
+
+ @property
+ def name(self):
+ return self._name
+
+ @property
+ def identifier(self):
+ return self.category + "#" + self.name
+
+ @property
+ def methods(self):
+ return self._definition.get("methods", [self.name])
+
+ @property
+ def objects(self):
+ return self._definition.get("objects")
+
+ @property
+ def record_in_processes(self):
+ return self._definition.get("record_in_processes")
+
+ @property
+ def record_in_processes_enum(self):
+ """Get the non-empty list of flags representing the processes to record data in"""
+ return [utils.process_name_to_enum(p) for p in self.record_in_processes]
+
+ @property
+ def products(self):
+ """Get the non-empty list of products to record data on"""
+ return self._definition.get("products")
+
+ @property
+ def products_enum(self):
+ """Get the non-empty list of flags representing products to record data on"""
+ return [utils.product_name_to_enum(p) for p in self.products]
+
+ @property
+ def expiry_version(self):
+ return self._definition.get("expiry_version")
+
+ @property
+ def operating_systems(self):
+ """Get the list of operating systems to record data on"""
+ return self._definition.get("operating_systems", ["all"])
+
+ def record_on_os(self, target_os):
+ """Check if this probe should be recorded on the passed os."""
+ os = self.operating_systems
+ if "all" in os:
+ return True
+
+ canonical_os = utils.canonical_os(target_os)
+
+ if "unix" in os and canonical_os in utils.UNIX_LIKE_OS:
+ return True
+
+ return canonical_os in os
+
+ @property
+ def enum_labels(self):
+ def enum(method_name, object_name):
+ m = convert_to_cpp_identifier(method_name, "_")
+ o = convert_to_cpp_identifier(object_name, "_")
+ return m + "_" + o
+
+ combinations = itertools.product(self.methods, self.objects)
+ return [enum(t[0], t[1]) for t in combinations]
+
+ @property
+ def dataset(self):
+ """Get the nsITelemetry constant equivalent for release_channel_collection."""
+ rcc = self.dataset_short
+ if rcc == "opt-out":
+ return "nsITelemetry::DATASET_ALL_CHANNELS"
+ return "nsITelemetry::DATASET_PRERELEASE_CHANNELS"
+
+ @property
+ def dataset_short(self):
+ """Get the short name of the chosen release channel collection policy for the event."""
+ # The collection policy is optional, but we still define a default
+ # behaviour for it.
+ return self._definition.get("release_channel_collection", "opt-in")
+
+ @property
+ def extra_keys(self):
+ return list(sorted(self._definition.get("extra_keys", {}).keys()))
+
+
+def load_events(filename, strict_type_checks):
+ """Parses a YAML file containing the event definitions.
+
+ :param filename: the YAML file containing the event definitions.
+ :strict_type_checks A boolean indicating whether to use the stricter type checks.
+ :raises ParserError: if the event file cannot be opened or parsed.
+ """
+
+ # Parse the event definitions from the YAML file.
+ events = None
+ try:
+ with open(filename, "r") as f:
+ events = yaml.safe_load(f)
+ except IOError as e:
+ ParserError("Error opening " + filename + ": " + str(e) + ".").handle_now()
+ except ParserError as e:
+ ParserError(
+ "Error parsing events in " + filename + ": " + str(e) + "."
+ ).handle_now()
+
+ event_list = []
+
+ # Events are defined in a fixed two-level hierarchy within the definition file.
+ # The first level contains the category (group name), while the second level contains
+ # the event names and definitions, e.g.:
+ # category.name:
+ # event_name:
+ # <event definition>
+ # ...
+ # ...
+ for category_name, category in sorted(events.items()):
+ string_check(
+ "top level structure",
+ field="category",
+ value=category_name,
+ min_length=1,
+ max_length=MAX_CATEGORY_NAME_LENGTH,
+ regex=IDENTIFIER_PATTERN,
+ )
+
+ # Make sure that the category has at least one entry in it.
+ if not category or len(category) == 0:
+ ParserError(
+ "Category " + category_name + " must contain at least one entry."
+ ).handle_now()
+
+ for name, entry in sorted(category.items()):
+ string_check(
+ category_name,
+ field="event name",
+ value=name,
+ min_length=1,
+ max_length=MAX_METHOD_NAME_LENGTH,
+ regex=IDENTIFIER_PATTERN,
+ )
+ event_list.append(EventData(category_name, name, entry, strict_type_checks))
+
+ return event_list
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py
new file mode 100644
index 0000000000..747d872ff0
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py
@@ -0,0 +1,1009 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import atexit
+import collections
+import itertools
+import json
+import math
+import os
+import re
+import runpy
+import sys
+from collections import OrderedDict
+from ctypes import c_int
+
+from . import shared_telemetry_utils as utils
+from .shared_telemetry_utils import ParserError
+
+atexit.register(ParserError.exit_func)
+
+# Constants.
+MAX_LABEL_LENGTH = 20
+MAX_LABEL_COUNT = 100
+MAX_KEY_COUNT = 30
+MAX_KEY_LENGTH = 20
+MIN_CATEGORICAL_BUCKET_COUNT = 50
+CPP_IDENTIFIER_PATTERN = "^[a-z][a-z0-9_]+[a-z0-9]$"
+
+ALWAYS_ALLOWED_KEYS = [
+ "kind",
+ "description",
+ "operating_systems",
+ "expires_in_version",
+ "alert_emails",
+ "keyed",
+ "releaseChannelCollection",
+ "bug_numbers",
+ "keys",
+ "record_in_processes",
+ "record_into_store",
+ "products",
+]
+
+BASE_DOC_URL = (
+ "https://firefox-source-docs.mozilla.org/toolkit/components/" "telemetry/telemetry/"
+)
+HISTOGRAMS_DOC_URL = BASE_DOC_URL + "collection/histograms.html"
+SCALARS_DOC_URL = BASE_DOC_URL + "collection/scalars.html"
+
+GECKOVIEW_STREAMING_SUPPORTED_KINDS = [
+ "linear",
+ "exponential",
+ "categorical",
+]
+
+# parse_histograms.py is used by scripts from a mozilla-central build tree
+# and also by outside consumers, such as the telemetry server. We need
+# to ensure that importing things works in both contexts. Therefore,
+# unconditionally importing things that are local to the build tree, such
+# as buildconfig, is a no-no.
+try:
+ import buildconfig
+
+ # Need to update sys.path to be able to find usecounters.
+ sys.path.append(os.path.join(buildconfig.topsrcdir, "dom/base/"))
+except ImportError:
+ # Must be in an out-of-tree usage scenario. Trust that whoever is
+ # running this script knows we need the usecounters module and has
+ # ensured it's in our sys.path.
+ pass
+
+
+def linear_buckets(dmin, dmax, n_buckets):
+ ret_array = [0] * n_buckets
+ dmin = float(dmin)
+ dmax = float(dmax)
+ for i in range(1, n_buckets):
+ linear_range = (dmin * (n_buckets - 1 - i) + dmax * (i - 1)) / (n_buckets - 2)
+ ret_array[i] = int(linear_range + 0.5)
+ return ret_array
+
+
+def exponential_buckets(dmin, dmax, n_buckets):
+ log_max = math.log(dmax)
+ bucket_index = 2
+ ret_array = [0] * n_buckets
+ current = dmin
+ ret_array[1] = current
+ for bucket_index in range(2, n_buckets):
+ log_current = math.log(current)
+ log_ratio = (log_max - log_current) / (n_buckets - bucket_index)
+ log_next = log_current + log_ratio
+ next_value = int(math.floor(math.exp(log_next) + 0.5))
+ if next_value > current:
+ current = next_value
+ else:
+ current = current + 1
+ ret_array[bucket_index] = current
+ return ret_array
+
+
+allowlists = None
+
+
+def load_allowlist():
+ global allowlists
+ try:
+ parsers_path = os.path.realpath(os.path.dirname(__file__))
+ # The parsers live in build_scripts/parsers in the Telemetry module, while
+ # the histogram-allowlists file lives in the root of the module. Account
+ # for that when looking for the allowlist.
+ # NOTE: if the parsers are moved, this logic will need to be updated.
+ telemetry_module_path = os.path.abspath(
+ os.path.join(parsers_path, os.pardir, os.pardir)
+ )
+ allowlist_path = os.path.join(
+ telemetry_module_path, "histogram-allowlists.json"
+ )
+ with open(allowlist_path, "r") as f:
+ try:
+ allowlists = json.load(f)
+ for name, allowlist in allowlists.items():
+ allowlists[name] = set(allowlist)
+ except ValueError:
+ ParserError("Error parsing allowlist: %s" % allowlist_path).handle_now()
+ except IOError:
+ allowlists = None
+ ParserError("Unable to parse allowlist: %s." % allowlist_path).handle_now()
+
+
+class Histogram:
+ """A class for representing a histogram definition."""
+
+ def __init__(self, name, definition, strict_type_checks=False):
+ """Initialize a histogram named name with the given definition.
+ definition is a dict-like object that must contain at least the keys:
+
+ - 'kind': The kind of histogram. Must be one of 'boolean', 'flag',
+ 'count', 'enumerated', 'linear', or 'exponential'.
+ - 'description': A textual description of the histogram.
+ - 'strict_type_checks': A boolean indicating whether to use the new, stricter type checks.
+ The server-side still has to deal with old, oddly typed
+ submissions, so we have to skip them there by default.
+ """
+ self._strict_type_checks = strict_type_checks
+ self._is_use_counter = name.startswith("USE_COUNTER2_")
+ if self._is_use_counter:
+ definition.setdefault("record_in_processes", ["main", "content"])
+ definition.setdefault("releaseChannelCollection", "opt-out")
+ definition.setdefault("products", ["firefox", "fennec"])
+ self.verify_attributes(name, definition)
+ self._name = name
+ self._description = definition["description"]
+ self._kind = definition["kind"]
+ self._keys = definition.get("keys", [])
+ self._keyed = definition.get("keyed", False)
+ self._expiration = definition.get("expires_in_version")
+ self._labels = definition.get("labels", [])
+ self._record_in_processes = definition.get("record_in_processes")
+ self._record_into_store = definition.get("record_into_store", ["main"])
+ self._products = definition.get("products")
+ self._operating_systems = definition.get("operating_systems", ["all"])
+
+ self.compute_bucket_parameters(definition)
+ self.set_nsITelemetry_kind()
+ self.set_dataset(definition)
+
+ def name(self):
+ """Return the name of the histogram."""
+ return self._name
+
+ def description(self):
+ """Return the description of the histogram."""
+ return self._description
+
+ def kind(self):
+ """Return the kind of the histogram.
+ Will be one of 'boolean', 'flag', 'count', 'enumerated', 'categorical', 'linear',
+ or 'exponential'."""
+ return self._kind
+
+ def expiration(self):
+ """Return the expiration version of the histogram."""
+ return self._expiration
+
+ def nsITelemetry_kind(self):
+ """Return the nsITelemetry constant corresponding to the kind of
+ the histogram."""
+ return self._nsITelemetry_kind
+
+ def low(self):
+ """Return the lower bound of the histogram."""
+ return self._low
+
+ def high(self):
+ """Return the high bound of the histogram."""
+ return self._high
+
+ def n_buckets(self):
+ """Return the number of buckets in the histogram."""
+ return self._n_buckets
+
+ def keyed(self):
+ """Returns True if this a keyed histogram, false otherwise."""
+ return self._keyed
+
+ def keys(self):
+ """Returns a list of allowed keys for keyed histogram, [] for others."""
+ return self._keys
+
+ def dataset(self):
+ """Returns the dataset this histogram belongs into."""
+ return self._dataset
+
+ def labels(self):
+ """Returns a list of labels for a categorical histogram, [] for others."""
+ return self._labels
+
+ def record_in_processes(self):
+ """Returns a list of processes this histogram is permitted to record in."""
+ return self._record_in_processes
+
+ def record_in_processes_enum(self):
+ """Get the non-empty list of flags representing the processes to record data in"""
+ return [utils.process_name_to_enum(p) for p in self.record_in_processes()]
+
+ def products(self):
+ """Get the non-empty list of products to record data on"""
+ return self._products
+
+ def products_enum(self):
+ """Get the non-empty list of flags representing products to record data on"""
+ return [utils.product_name_to_enum(p) for p in self.products()]
+
+ def operating_systems(self):
+ """Get the list of operating systems to record data on"""
+ return self._operating_systems
+
+ def record_on_os(self, target_os):
+ """Check if this probe should be recorded on the passed os."""
+ os = self.operating_systems()
+ if "all" in os:
+ return True
+
+ canonical_os = utils.canonical_os(target_os)
+
+ if "unix" in os and canonical_os in utils.UNIX_LIKE_OS:
+ return True
+
+ return canonical_os in os
+
+ def record_into_store(self):
+ """Get the non-empty list of stores to record into"""
+ return self._record_into_store
+
+ def ranges(self):
+ """Return an array of lower bounds for each bucket in the histogram."""
+ bucket_fns = {
+ "boolean": linear_buckets,
+ "flag": linear_buckets,
+ "count": linear_buckets,
+ "enumerated": linear_buckets,
+ "categorical": linear_buckets,
+ "linear": linear_buckets,
+ "exponential": exponential_buckets,
+ }
+
+ if self._kind not in bucket_fns:
+ ParserError(
+ 'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)
+ ).handle_later()
+
+ fn = bucket_fns[self._kind]
+ return fn(self.low(), self.high(), self.n_buckets())
+
+ def compute_bucket_parameters(self, definition):
+ bucket_fns = {
+ "boolean": Histogram.boolean_flag_bucket_parameters,
+ "flag": Histogram.boolean_flag_bucket_parameters,
+ "count": Histogram.boolean_flag_bucket_parameters,
+ "enumerated": Histogram.enumerated_bucket_parameters,
+ "categorical": Histogram.categorical_bucket_parameters,
+ "linear": Histogram.linear_bucket_parameters,
+ "exponential": Histogram.exponential_bucket_parameters,
+ }
+
+ if self._kind not in bucket_fns:
+ ParserError(
+ 'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)
+ ).handle_later()
+
+ fn = bucket_fns[self._kind]
+ self.set_bucket_parameters(*fn(definition))
+
+ def verify_attributes(self, name, definition):
+ general_keys = ALWAYS_ALLOWED_KEYS + ["low", "high", "n_buckets"]
+
+ table = {
+ "boolean": ALWAYS_ALLOWED_KEYS,
+ "flag": ALWAYS_ALLOWED_KEYS,
+ "count": ALWAYS_ALLOWED_KEYS,
+ "enumerated": ALWAYS_ALLOWED_KEYS + ["n_values"],
+ "categorical": ALWAYS_ALLOWED_KEYS + ["labels", "n_values"],
+ "linear": general_keys,
+ "exponential": general_keys,
+ }
+ # We removed extended_statistics_ok on the client, but the server-side,
+ # where _strict_type_checks==False, has to deal with historical data.
+ if not self._strict_type_checks:
+ table["exponential"].append("extended_statistics_ok")
+
+ kind = definition["kind"]
+ if kind not in table:
+ ParserError(
+ 'Unknown kind "%s" for histogram "%s".' % (kind, name)
+ ).handle_later()
+ allowed_keys = table[kind]
+
+ self.check_name(name)
+ self.check_keys(name, definition, allowed_keys)
+ self.check_keys_field(name, definition)
+ self.check_field_types(name, definition)
+ self.check_allowlisted_kind(name, definition)
+ self.check_allowlistable_fields(name, definition)
+ self.check_expiration(name, definition)
+ self.check_label_values(name, definition)
+ self.check_record_in_processes(name, definition)
+ self.check_products(name, definition)
+ self.check_operating_systems(name, definition)
+ self.check_record_into_store(name, definition)
+
+ def check_name(self, name):
+ if "#" in name:
+ ParserError(
+ 'Error for histogram name "%s": "#" is not allowed.' % (name)
+ ).handle_later()
+
+ # Avoid C++ identifier conflicts between histogram enums and label enum names.
+ if name.startswith("LABELS_"):
+ ParserError(
+ 'Error for histogram name "%s": can not start with "LABELS_".' % (name)
+ ).handle_later()
+
+ # To make it easier to generate C++ identifiers from this etc., we restrict
+ # the histogram names to a strict pattern.
+ # We skip this on the server to avoid failures with old Histogram.json revisions.
+ if self._strict_type_checks:
+ if not re.match(CPP_IDENTIFIER_PATTERN, name, re.IGNORECASE):
+ ParserError(
+ 'Error for histogram name "%s": name does not conform to "%s"'
+ % (name, CPP_IDENTIFIER_PATTERN)
+ ).handle_later()
+
+ def check_expiration(self, name, definition):
+ field = "expires_in_version"
+ expiration = definition.get(field)
+
+ if not expiration:
+ return
+
+ # We forbid new probes from using "expires_in_version" : "default" field/value pair.
+ # Old ones that use this are added to the allowlist.
+ if (
+ expiration == "default"
+ and allowlists is not None
+ and name not in allowlists["expiry_default"]
+ ):
+ ParserError(
+ 'New histogram "%s" cannot have "default" %s value.' % (name, field)
+ ).handle_later()
+
+ # Historical editions of Histograms.json can have the deprecated
+ # expiration format 'N.Na1'. Fortunately, those scripts set
+ # self._strict_type_checks to false.
+ if (
+ expiration != "default"
+ and not utils.validate_expiration_version(expiration)
+ and self._strict_type_checks
+ ):
+ ParserError(
+ (
+ "Error for histogram {} - invalid {}: {}."
+ "\nSee: {}#expires-in-version"
+ ).format(name, field, expiration, HISTOGRAMS_DOC_URL)
+ ).handle_later()
+
+ expiration = utils.add_expiration_postfix(expiration)
+
+ definition[field] = expiration
+
+ def check_label_values(self, name, definition):
+ labels = definition.get("labels")
+ if not labels:
+ return
+
+ invalid = filter(lambda l: len(l) > MAX_LABEL_LENGTH, labels)
+ if len(list(invalid)) > 0:
+ ParserError(
+ 'Label values for "%s" exceed length limit of %d: %s'
+ % (name, MAX_LABEL_LENGTH, ", ".join(invalid))
+ ).handle_later()
+
+ if len(labels) > MAX_LABEL_COUNT:
+ ParserError(
+ 'Label count for "%s" exceeds limit of %d' % (name, MAX_LABEL_COUNT)
+ ).handle_now()
+
+ # To make it easier to generate C++ identifiers from this etc., we restrict
+ # the label values to a strict pattern.
+ invalid = filter(
+ lambda l: not re.match(CPP_IDENTIFIER_PATTERN, l, re.IGNORECASE), labels
+ )
+ if len(list(invalid)) > 0:
+ ParserError(
+ 'Label values for %s are not matching pattern "%s": %s'
+ % (name, CPP_IDENTIFIER_PATTERN, ", ".join(invalid))
+ ).handle_later()
+
+ def check_record_in_processes(self, name, definition):
+ if not self._strict_type_checks:
+ return
+
+ field = "record_in_processes"
+ rip = definition.get(field)
+
+ DOC_URL = HISTOGRAMS_DOC_URL + "#record-in-processes"
+
+ if not rip:
+ ParserError(
+ 'Histogram "%s" must have a "%s" field:\n%s' % (name, field, DOC_URL)
+ ).handle_later()
+
+ for process in rip:
+ if not utils.is_valid_process_name(process):
+ ParserError(
+ 'Histogram "%s" has unknown process "%s" in %s.\n%s'
+ % (name, process, field, DOC_URL)
+ ).handle_later()
+
+ def check_products(self, name, definition):
+ if not self._strict_type_checks:
+ return
+
+ field = "products"
+ products = definition.get(field)
+
+ DOC_URL = HISTOGRAMS_DOC_URL + "#products"
+
+ if not products:
+ ParserError(
+ 'Histogram "%s" must have a "%s" field:\n%s' % (name, field, DOC_URL)
+ ).handle_now()
+
+ for product in products:
+ if not utils.is_valid_product(product):
+ ParserError(
+ 'Histogram "%s" has unknown product "%s" in %s.\n%s'
+ % (name, product, field, DOC_URL)
+ ).handle_later()
+ if utils.is_geckoview_streaming_product(product):
+ kind = definition.get("kind")
+ if kind not in GECKOVIEW_STREAMING_SUPPORTED_KINDS:
+ ParserError(
+ (
+ 'Histogram "%s" is of kind "%s" which is unsupported for '
+ 'product "%s".'
+ )
+ % (name, kind, product)
+ ).handle_later()
+ keyed = definition.get("keyed")
+ if keyed:
+ ParserError(
+ 'Keyed histograms like "%s" are unsupported for product "%s"'
+ % (name, product)
+ ).handle_later()
+
+ def check_operating_systems(self, name, definition):
+ if not self._strict_type_checks:
+ return
+
+ field = "operating_systems"
+ operating_systems = definition.get(field)
+
+ DOC_URL = HISTOGRAMS_DOC_URL + "#operating-systems"
+
+ if not operating_systems:
+ # operating_systems is optional
+ return
+
+ for operating_system in operating_systems:
+ if not utils.is_valid_os(operating_system):
+ ParserError(
+ 'Histogram "%s" has unknown operating system "%s" in %s.\n%s'
+ % (name, operating_system, field, DOC_URL)
+ ).handle_later()
+
+ def check_record_into_store(self, name, definition):
+ if not self._strict_type_checks:
+ return
+
+ field = "record_into_store"
+ DOC_URL = HISTOGRAMS_DOC_URL + "#record-into-store"
+
+ if field not in definition:
+ # record_into_store is optional
+ return
+
+ record_into_store = definition.get(field)
+ # record_into_store should not be empty
+ if not record_into_store:
+ ParserError(
+ 'Histogram "%s" has empty list of stores, which is not allowed.\n%s'
+ % (name, DOC_URL)
+ ).handle_later()
+
+ def check_keys_field(self, name, definition):
+ keys = definition.get("keys")
+ if not self._strict_type_checks or keys is None:
+ return
+
+ if not definition.get("keyed", False):
+ raise ValueError(
+ "'keys' field is not valid for %s; only allowed for keyed histograms."
+ % (name)
+ )
+
+ if len(keys) == 0:
+ raise ValueError("The key list for %s cannot be empty" % (name))
+
+ if len(keys) > MAX_KEY_COUNT:
+ raise ValueError(
+ "Label count for %s exceeds limit of %d" % (name, MAX_KEY_COUNT)
+ )
+
+ invalid = filter(lambda k: len(k) > MAX_KEY_LENGTH, keys)
+ if len(list(invalid)) > 0:
+ raise ValueError(
+ '"keys" values for %s are exceeding length "%d": %s'
+ % (name, MAX_KEY_LENGTH, ", ".join(invalid))
+ )
+
+ def check_allowlisted_kind(self, name, definition):
+ # We don't need to run any of these checks on the server.
+ if not self._strict_type_checks or allowlists is None:
+ return
+
+ # Disallow "flag" and "count" histograms on desktop, suggest to use
+ # scalars instead. Allow using these histograms on Android, as we
+ # don't support scalars there yet.
+ hist_kind = definition.get("kind")
+ android_target = "android" in definition.get("operating_systems", [])
+
+ if (
+ not android_target
+ and hist_kind in ["flag", "count"]
+ and name not in allowlists["kind"]
+ ):
+ ParserError(
+ (
+ 'Unsupported kind "%s" for histogram "%s":\n'
+ 'New "%s" histograms are not supported on Desktop, you should'
+ " use scalars instead:\n"
+ "%s\n"
+ "Are you trying to add a histogram on Android?"
+ ' Add "operating_systems": ["android"] to your histogram definition.'
+ )
+ % (hist_kind, name, hist_kind, SCALARS_DOC_URL)
+ ).handle_now()
+
+ # Check for the presence of fields that old histograms are allowlisted for.
+ def check_allowlistable_fields(self, name, definition):
+ # Use counters don't have any mechanism to add the fields checked here,
+ # so skip the check for them.
+ # We also don't need to run any of these checks on the server.
+ if self._is_use_counter or not self._strict_type_checks:
+ return
+
+ # In the pipeline we don't have allowlists available.
+ if allowlists is None:
+ return
+
+ for field in ["alert_emails", "bug_numbers"]:
+ if field not in definition and name not in allowlists[field]:
+ ParserError(
+ 'New histogram "%s" must have a "%s" field.' % (name, field)
+ ).handle_later()
+ if field in definition and name in allowlists[field]:
+ msg = (
+ 'Histogram "%s" should be removed from the allowlist for "%s" in '
+ "histogram-allowlists.json."
+ )
+ ParserError(msg % (name, field)).handle_later()
+
+ def check_field_types(self, name, definition):
+ # Define expected types for the histogram properties.
+ type_checked_fields = {
+ "n_buckets": int,
+ "n_values": int,
+ "low": int,
+ "high": int,
+ "keyed": bool,
+ "expires_in_version": str,
+ "kind": str,
+ "description": str,
+ "releaseChannelCollection": str,
+ }
+
+ # For list fields we check the items types.
+ type_checked_list_fields = {
+ "bug_numbers": int,
+ "alert_emails": str,
+ "labels": str,
+ "record_in_processes": str,
+ "keys": str,
+ "products": str,
+ "operating_systems": str,
+ "record_into_store": str,
+ }
+
+ # For the server-side, where _strict_type_checks==False, we want to
+ # skip the stricter type checks for these fields for dealing with
+ # historical data.
+ coerce_fields = ["low", "high", "n_values", "n_buckets"]
+ if not self._strict_type_checks:
+ # This handles some old non-numeric expressions.
+ EXPRESSIONS = {
+ "JS::GCReason::NUM_TELEMETRY_REASONS": 101,
+ "mozilla::StartupTimeline::MAX_EVENT_ID": 12,
+ }
+
+ def try_to_coerce_to_number(v):
+ if v in EXPRESSIONS:
+ return EXPRESSIONS[v]
+ try:
+ return eval(v, {})
+ except Exception:
+ return v
+
+ for key in [k for k in coerce_fields if k in definition]:
+ definition[key] = try_to_coerce_to_number(definition[key])
+ # This handles old "keyed":"true" definitions (bug 1271986).
+ if definition.get("keyed", None) == "true":
+ definition["keyed"] = True
+
+ def nice_type_name(t):
+ if t is str:
+ return "string"
+ return t.__name__
+
+ for key, key_type in type_checked_fields.items():
+ if key not in definition:
+ continue
+ if not isinstance(definition[key], key_type):
+ ParserError(
+ 'Value for key "{0}" in histogram "{1}" should be {2}.'.format(
+ key, name, nice_type_name(key_type)
+ )
+ ).handle_later()
+
+ # Make sure the max range is lower than or equal to INT_MAX
+ if "high" in definition and not c_int(definition["high"]).value > 0:
+ ParserError(
+ 'Value for high in histogram "{0}" should be lower or equal to INT_MAX.'.format(
+ nice_type_name(c_int)
+ )
+ ).handle_later()
+
+ for key, key_type in type_checked_list_fields.items():
+ if key not in definition:
+ continue
+ if not all(isinstance(x, key_type) for x in definition[key]):
+ ParserError(
+ 'All values for list "{0}" in histogram "{1}" should be of type'
+ " {2}.".format(key, name, nice_type_name(key_type))
+ ).handle_later()
+
+ def check_keys(self, name, definition, allowed_keys):
+ if not self._strict_type_checks:
+ return
+ for key in iter(definition.keys()):
+ if key not in allowed_keys:
+ ParserError(
+ 'Key "%s" is not allowed for histogram "%s".' % (key, name)
+ ).handle_later()
+
+ def set_bucket_parameters(self, low, high, n_buckets):
+ self._low = low
+ self._high = high
+ self._n_buckets = n_buckets
+ max_n_buckets = 101 if self._kind in ["enumerated", "categorical"] else 100
+ if (
+ allowlists is not None
+ and self._n_buckets > max_n_buckets
+ and type(self._n_buckets) is int
+ ):
+ if self._name not in allowlists["n_buckets"]:
+ ParserError(
+ 'New histogram "%s" is not permitted to have more than 100 buckets.\n'
+ "Histograms with large numbers of buckets use disproportionately high"
+ " amounts of resources. Contact a Telemetry peer (e.g. in #telemetry)"
+ " if you think an exception ought to be made:\n"
+ "https://wiki.mozilla.org/Modules/Toolkit#Telemetry" % self._name
+ ).handle_later()
+
+ @staticmethod
+ def boolean_flag_bucket_parameters(definition):
+ return (1, 2, 3)
+
+ @staticmethod
+ def linear_bucket_parameters(definition):
+ return (definition.get("low", 1), definition["high"], definition["n_buckets"])
+
+ @staticmethod
+ def enumerated_bucket_parameters(definition):
+ n_values = definition["n_values"]
+ return (1, n_values, n_values + 1)
+
+ @staticmethod
+ def categorical_bucket_parameters(definition):
+ # Categorical histograms default to 50 buckets to make working with them easier.
+ # Otherwise when adding labels later we run into problems with the pipeline not
+ # supporting bucket changes.
+ # This can be overridden using the n_values field.
+ n_values = max(
+ len(definition["labels"]),
+ definition.get("n_values", 0),
+ MIN_CATEGORICAL_BUCKET_COUNT,
+ )
+ return (1, n_values, n_values + 1)
+
+ @staticmethod
+ def exponential_bucket_parameters(definition):
+ return (definition.get("low", 1), definition["high"], definition["n_buckets"])
+
+ def set_nsITelemetry_kind(self):
+ # Pick a Telemetry implementation type.
+ types = {
+ "boolean": "BOOLEAN",
+ "flag": "FLAG",
+ "count": "COUNT",
+ "enumerated": "LINEAR",
+ "categorical": "CATEGORICAL",
+ "linear": "LINEAR",
+ "exponential": "EXPONENTIAL",
+ }
+
+ if self._kind not in types:
+ ParserError(
+ 'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)
+ ).handle_later()
+
+ self._nsITelemetry_kind = "nsITelemetry::HISTOGRAM_%s" % types[self._kind]
+
+ def set_dataset(self, definition):
+ datasets = {
+ "opt-in": "DATASET_PRERELEASE_CHANNELS",
+ "opt-out": "DATASET_ALL_CHANNELS",
+ }
+
+ value = definition.get("releaseChannelCollection", "opt-in")
+ if value not in datasets:
+ ParserError(
+ "Unknown value for releaseChannelCollection"
+ ' policy for histogram "%s".' % self._name
+ ).handle_later()
+
+ self._dataset = "nsITelemetry::" + datasets[value]
+
+
+# This hook function loads the histograms into an OrderedDict.
+# It will raise a ParserError if duplicate keys are found.
+def load_histograms_into_dict(ordered_pairs, strict_type_checks):
+ d = collections.OrderedDict()
+ for key, value in ordered_pairs:
+ if strict_type_checks and key in d:
+ ParserError(
+ "Found duplicate key in Histograms file: %s" % key
+ ).handle_later()
+ d[key] = value
+ return d
+
+
+# We support generating histograms from multiple different input files, not
+# just Histograms.json. For each file's basename, we have a specific
+# routine to parse that file, and return a dictionary mapping histogram
+# names to histogram parameters.
+def from_json(filename, strict_type_checks):
+ with open(filename, "r") as f:
+ try:
+
+ def hook(ps):
+ return load_histograms_into_dict(ps, strict_type_checks)
+
+ histograms = json.load(f, object_pairs_hook=hook)
+ except ValueError as e:
+ ParserError(
+ "error parsing histograms in %s: %s" % (filename, e)
+ ).handle_now()
+ return histograms
+
+
+def from_UseCounters_conf(filename, strict_type_checks):
+ return usecounters.generate_histograms(filename)
+
+
+def from_UseCountersWorker_conf(filename, strict_type_checks):
+ return usecounters.generate_histograms(filename, True)
+
+
+def from_nsDeprecatedOperationList(filename, strict_type_checks):
+ operation_regex = re.compile("^DEPRECATED_OPERATION\\(([^)]+)\\)")
+ histograms = collections.OrderedDict()
+
+ with open(filename, "r") as f:
+ for line in f:
+ match = operation_regex.search(line)
+ if not match:
+ continue
+
+ op = match.group(1)
+
+ def add_counter(context):
+ name = "USE_COUNTER2_DEPRECATED_%s_%s" % (op, context.upper())
+ histograms[name] = {
+ "expires_in_version": "never",
+ "kind": "boolean",
+ "description": "Whether a %s used %s" % (context, op),
+ }
+
+ add_counter("document")
+ add_counter("page")
+
+ return histograms
+
+
+def to_camel_case(property_name):
+ return re.sub(
+ "(^|_|-)([a-z0-9])",
+ lambda m: m.group(2).upper(),
+ property_name.strip("_").strip("-"),
+ )
+
+
+def add_css_property_counters(histograms, property_name):
+ def add_counter(context):
+ name = "USE_COUNTER2_CSS_PROPERTY_%s_%s" % (
+ to_camel_case(property_name),
+ context.upper(),
+ )
+ histograms[name] = {
+ "expires_in_version": "never",
+ "kind": "boolean",
+ "description": "Whether a %s used the CSS property %s"
+ % (context, property_name),
+ }
+
+ add_counter("document")
+ add_counter("page")
+
+
+def from_ServoCSSPropList(filename, strict_type_checks):
+ histograms = collections.OrderedDict()
+ properties = runpy.run_path(filename)["data"]
+ for prop in properties:
+ add_css_property_counters(histograms, prop.name)
+ return histograms
+
+
+def from_counted_unknown_properties(filename, strict_type_checks):
+ histograms = collections.OrderedDict()
+ properties = runpy.run_path(filename)["COUNTED_UNKNOWN_PROPERTIES"]
+
+ # NOTE(emilio): Unlike ServoCSSProperties, `prop` here is just the property
+ # name.
+ #
+ # We use the same naming as CSS properties so that we don't get
+ # discontinuity when we implement or prototype them.
+ for prop in properties:
+ add_css_property_counters(histograms, prop)
+ return histograms
+
+
+# This is only used for probe-scraper.
+def from_properties_db(filename, strict_type_checks):
+ histograms = collections.OrderedDict()
+ with open(filename, "r") as f:
+ in_css_properties = False
+
+ for line in f:
+ if not in_css_properties:
+ if line.startswith("exports.CSS_PROPERTIES = {"):
+ in_css_properties = True
+ continue
+
+ if line.startswith("};"):
+ break
+
+ if not line.startswith(' "'):
+ continue
+
+ name = line.split('"')[1]
+ add_css_property_counters(histograms, name)
+ return histograms
+
+
+FILENAME_PARSERS = [
+ (lambda x: from_json if x.endswith(".json") else None),
+ (
+ lambda x: from_nsDeprecatedOperationList
+ if x == "nsDeprecatedOperationList.h"
+ else None
+ ),
+ (lambda x: from_ServoCSSPropList if x == "ServoCSSPropList.py" else None),
+ (
+ lambda x: from_counted_unknown_properties
+ if x == "counted_unknown_properties.py"
+ else None
+ ),
+ (lambda x: from_properties_db if x == "properties-db.js" else None),
+]
+
+# Similarly to the dance above with buildconfig, usecounters may not be
+# available, so handle that gracefully.
+try:
+ import usecounters
+
+ FILENAME_PARSERS.append(
+ lambda x: from_UseCounters_conf if x == "UseCounters.conf" else None
+ )
+ FILENAME_PARSERS.append(
+ lambda x: from_UseCountersWorker_conf if x == "UseCountersWorker.conf" else None
+ )
+except ImportError:
+ pass
+
+
+def from_files(filenames, strict_type_checks=True):
+ """Return an iterator that provides a sequence of Histograms for
+ the histograms defined in filenames.
+ """
+ if strict_type_checks:
+ load_allowlist()
+
+ all_histograms = OrderedDict()
+ for filename in filenames:
+ parser = None
+ for checkFn in FILENAME_PARSERS:
+ parser = checkFn(os.path.basename(filename))
+ if parser is not None:
+ break
+
+ if parser is None:
+ ParserError("Don't know how to parse %s." % filename).handle_now()
+
+ histograms = parser(filename, strict_type_checks)
+
+ # OrderedDicts are important, because then the iteration order over
+ # the parsed histograms is stable, which makes the insertion into
+ # all_histograms stable, which makes ordering in generated files
+ # stable, which makes builds more deterministic.
+ if not isinstance(histograms, OrderedDict):
+ ParserError("Histogram parser did not provide an OrderedDict.").handle_now()
+
+ for (name, definition) in histograms.items():
+ if name in all_histograms:
+ ParserError('Duplicate histogram name "%s".' % name).handle_later()
+ all_histograms[name] = definition
+
+ def check_continuity(iterable, filter_function, name):
+ indices = list(filter(filter_function, enumerate(iter(iterable.keys()))))
+ if indices:
+ lower_bound = indices[0][0]
+ upper_bound = indices[-1][0]
+ n_counters = upper_bound - lower_bound + 1
+ if n_counters != len(indices):
+ ParserError(
+ "Histograms %s must be defined in a contiguous block." % name
+ ).handle_later()
+
+ # We require that all USE_COUNTER2_*_WORKER histograms be defined in a contiguous
+ # block.
+ check_continuity(
+ all_histograms,
+ lambda x: x[1].startswith("USE_COUNTER2_") and x[1].endswith("_WORKER"),
+ "use counter worker",
+ )
+ # And all other USE_COUNTER2_* histograms be defined in a contiguous
+ # block.
+ check_continuity(
+ all_histograms,
+ lambda x: x[1].startswith("USE_COUNTER2_") and not x[1].endswith("_WORKER"),
+ "use counter",
+ )
+
+ # Check that histograms that were removed from Histograms.json etc.
+ # are also removed from the allowlists.
+ if allowlists is not None:
+ all_allowlist_entries = itertools.chain.from_iterable(iter(allowlists.values()))
+ orphaned = set(all_allowlist_entries) - set(all_histograms.keys())
+ if len(orphaned) > 0:
+ msg = (
+ "The following entries are orphaned and should be removed from "
+ "histogram-allowlists.json:\n%s"
+ )
+ ParserError(msg % (", ".join(sorted(orphaned)))).handle_later()
+
+ for (name, definition) in all_histograms.items():
+ yield Histogram(name, definition, strict_type_checks=strict_type_checks)
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_scalars.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_scalars.py
new file mode 100644
index 0000000000..5ec591b393
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_scalars.py
@@ -0,0 +1,503 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import atexit
+import io
+import re
+
+import yaml
+
+from . import shared_telemetry_utils as utils
+from .shared_telemetry_utils import ParserError
+
+atexit.register(ParserError.exit_func)
+
+# The map of containing the allowed scalar types and their mapping to
+# nsITelemetry::SCALAR_TYPE_* type constants.
+
+BASE_DOC_URL = (
+ "https://firefox-source-docs.mozilla.org/toolkit/components/"
+ + "telemetry/telemetry/collection/scalars.html"
+)
+
+SCALAR_TYPES_MAP = {
+ "uint": "nsITelemetry::SCALAR_TYPE_COUNT",
+ "string": "nsITelemetry::SCALAR_TYPE_STRING",
+ "boolean": "nsITelemetry::SCALAR_TYPE_BOOLEAN",
+}
+
+
+class ScalarType:
+ """A class for representing a scalar definition."""
+
+ def __init__(self, category_name, probe_name, definition, strict_type_checks):
+ # Validate and set the name, so we don't need to pass it to the other
+ # validation functions.
+ self._strict_type_checks = strict_type_checks
+ self.validate_names(category_name, probe_name)
+ self._name = probe_name
+ self._category_name = category_name
+
+ # Validating the scalar definition.
+ self.validate_types(definition)
+ self.validate_values(definition)
+
+ # Everything is ok, set the rest of the data.
+ self._definition = definition
+ self._expires = utils.add_expiration_postfix(definition["expires"])
+
+ def validate_names(self, category_name, probe_name):
+ """Validate the category and probe name:
+ - Category name must be alpha-numeric + '.', no leading/trailing digit or '.'.
+ - Probe name must be alpha-numeric + '_', no leading/trailing digit or '_'.
+
+ :param category_name: the name of the category the probe is in.
+ :param probe_name: the name of the scalar probe.
+ :raises ParserError: if the length of the names exceeds the limit or they don't
+ conform our name specification.
+ """
+
+ # Enforce a maximum length on category and probe names.
+ MAX_NAME_LENGTH = 40
+ for n in [category_name, probe_name]:
+ if len(n) > MAX_NAME_LENGTH:
+ ParserError(
+ (
+ "Name '{}' exceeds maximum name length of {} characters.\n"
+ "See: {}#the-yaml-definition-file"
+ ).format(n, MAX_NAME_LENGTH, BASE_DOC_URL)
+ ).handle_later()
+
+ def check_name(name, error_msg_prefix, allowed_char_regexp):
+ # Check if we only have the allowed characters.
+ chars_regxp = r"^[a-zA-Z0-9" + allowed_char_regexp + r"]+$"
+ if not re.search(chars_regxp, name):
+ ParserError(
+ (
+ error_msg_prefix + " name must be alpha-numeric. Got: '{}'.\n"
+ "See: {}#the-yaml-definition-file"
+ ).format(name, BASE_DOC_URL)
+ ).handle_later()
+
+ # Don't allow leading/trailing digits, '.' or '_'.
+ if re.search(r"(^[\d\._])|([\d\._])$", name):
+ ParserError(
+ (
+ error_msg_prefix + " name must not have a leading/trailing "
+ "digit, a dot or underscore. Got: '{}'.\n"
+ " See: {}#the-yaml-definition-file"
+ ).format(name, BASE_DOC_URL)
+ ).handle_later()
+
+ check_name(category_name, "Category", r"\.")
+ check_name(probe_name, "Probe", r"_")
+
+ def validate_types(self, definition):
+ """This function performs some basic sanity checks on the scalar definition:
+ - Checks that all the required fields are available.
+ - Checks that all the fields have the expected types.
+
+ :param definition: the dictionary containing the scalar properties.
+ :raises ParserError: if a scalar definition field is of the wrong type.
+ :raises ParserError: if a required field is missing or unknown fields are present.
+ """
+
+ if not self._strict_type_checks:
+ return
+
+ def validate_notification_email(notification_email):
+ # Perform simple email validation to make sure it doesn't contain spaces or commas.
+ return not any(c in notification_email for c in [",", " "])
+
+ # The required and optional fields in a scalar type definition.
+ REQUIRED_FIELDS = {
+ "bug_numbers": list, # This contains ints. See LIST_FIELDS_CONTENT.
+ "description": str,
+ "expires": str,
+ "kind": str,
+ "notification_emails": list, # This contains strings. See LIST_FIELDS_CONTENT.
+ "record_in_processes": list,
+ "products": list,
+ }
+
+ OPTIONAL_FIELDS = {
+ "release_channel_collection": str,
+ "keyed": bool,
+ "keys": list,
+ "operating_systems": list,
+ "record_into_store": list,
+ }
+
+ # The types for the data within the fields that hold lists.
+ LIST_FIELDS_CONTENT = {
+ "bug_numbers": int,
+ "notification_emails": str,
+ "record_in_processes": str,
+ "products": str,
+ "keys": str,
+ "operating_systems": str,
+ "record_into_store": str,
+ }
+
+ # Concatenate the required and optional field definitions.
+ ALL_FIELDS = REQUIRED_FIELDS.copy()
+ ALL_FIELDS.update(OPTIONAL_FIELDS)
+
+ # Checks that all the required fields are available.
+ missing_fields = [f for f in REQUIRED_FIELDS.keys() if f not in definition]
+ if len(missing_fields) > 0:
+ ParserError(
+ self._name
+ + " - missing required fields: "
+ + ", ".join(missing_fields)
+ + ".\nSee: {}#required-fields".format(BASE_DOC_URL)
+ ).handle_later()
+
+ # Do we have any unknown field?
+ unknown_fields = [f for f in definition.keys() if f not in ALL_FIELDS]
+ if len(unknown_fields) > 0:
+ ParserError(
+ self._name
+ + " - unknown fields: "
+ + ", ".join(unknown_fields)
+ + ".\nSee: {}#required-fields".format(BASE_DOC_URL)
+ ).handle_later()
+
+ # Checks the type for all the fields.
+ wrong_type_names = [
+ "{} must be {}".format(f, str(ALL_FIELDS[f]))
+ for f in definition.keys()
+ if not isinstance(definition[f], ALL_FIELDS[f])
+ ]
+ if len(wrong_type_names) > 0:
+ ParserError(
+ self._name
+ + " - "
+ + ", ".join(wrong_type_names)
+ + ".\nSee: {}#required-fields".format(BASE_DOC_URL)
+ ).handle_later()
+
+ # Check that the email addresses doesn't contain spaces or commas
+ notification_emails = definition.get("notification_emails")
+ for notification_email in notification_emails:
+ if not validate_notification_email(notification_email):
+ ParserError(
+ self._name
+ + " - invalid email address: "
+ + notification_email
+ + ".\nSee: {}".format(BASE_DOC_URL)
+ ).handle_later()
+
+ # Check that the lists are not empty and that data in the lists
+ # have the correct types.
+ list_fields = [f for f in definition if isinstance(definition[f], list)]
+ for field in list_fields:
+ # Check for empty lists.
+ if len(definition[field]) == 0:
+ ParserError(
+ (
+ "Field '{}' for probe '{}' must not be empty"
+ + ".\nSee: {}#required-fields)"
+ ).format(field, self._name, BASE_DOC_URL)
+ ).handle_later()
+ # Check the type of the list content.
+ broken_types = [
+ not isinstance(v, LIST_FIELDS_CONTENT[field]) for v in definition[field]
+ ]
+ if any(broken_types):
+ ParserError(
+ (
+ "Field '{}' for probe '{}' must only contain values of type {}"
+ ".\nSee: {}#the-yaml-definition-file)"
+ ).format(
+ field,
+ self._name,
+ str(LIST_FIELDS_CONTENT[field]),
+ BASE_DOC_URL,
+ )
+ ).handle_later()
+
+ # Check that keys are only added to keyed scalars and that their values are valid
+ MAX_KEY_COUNT = 100
+ MAX_KEY_LENGTH = 72
+ keys = definition.get("keys")
+ if keys is not None:
+ if not definition.get("keyed", False):
+ ParserError(
+ self._name
+ + "- invalid field: "
+ + "\n`keys` field only valid for keyed histograms"
+ ).handle_later()
+
+ if len(keys) > MAX_KEY_COUNT:
+ ParserError(
+ self._name
+ + " - exceeding key count: "
+ + "\n`keys` values count must not exceed {}".format(MAX_KEY_COUNT)
+ ).handle_later()
+
+ invalid = list(filter(lambda k: len(k) > MAX_KEY_LENGTH, keys))
+ if len(invalid) > 0:
+ ParserError(
+ self._name
+ + " - invalid key value"
+ + "\n `keys` values are exceeding length {}:".format(MAX_KEY_LENGTH)
+ + ", ".join(invalid)
+ ).handle_later()
+
+ def validate_values(self, definition):
+ """This function checks that the fields have the correct values.
+
+ :param definition: the dictionary containing the scalar properties.
+ :raises ParserError: if a scalar definition field contains an unexpected value.
+ """
+
+ if not self._strict_type_checks:
+ return
+
+ # Validate the scalar kind.
+ scalar_kind = definition.get("kind")
+ if scalar_kind not in SCALAR_TYPES_MAP.keys():
+ ParserError(
+ self._name
+ + " - unknown scalar kind: "
+ + scalar_kind
+ + ".\nSee: {}".format(BASE_DOC_URL)
+ ).handle_later()
+
+ # Validate the collection policy.
+ collection_policy = definition.get("release_channel_collection", None)
+ if collection_policy and collection_policy not in ["opt-in", "opt-out"]:
+ ParserError(
+ self._name
+ + " - unknown collection policy: "
+ + collection_policy
+ + ".\nSee: {}#optional-fields".format(BASE_DOC_URL)
+ ).handle_later()
+
+ # Validate operating_systems.
+ operating_systems = definition.get("operating_systems", [])
+ for operating_system in operating_systems:
+ if not utils.is_valid_os(operating_system):
+ ParserError(
+ self._name
+ + " - invalid entry in operating_systems: "
+ + operating_system
+ + ".\nSee: {}#optional-fields".format(BASE_DOC_URL)
+ ).handle_later()
+
+ # Validate record_in_processes.
+ record_in_processes = definition.get("record_in_processes", [])
+ for proc in record_in_processes:
+ if not utils.is_valid_process_name(proc):
+ ParserError(
+ self._name
+ + " - unknown value in record_in_processes: "
+ + proc
+ + ".\nSee: {}".format(BASE_DOC_URL)
+ ).handle_later()
+
+ # Validate product.
+ products = definition.get("products", [])
+ for product in products:
+ if not utils.is_valid_product(product):
+ ParserError(
+ self._name
+ + " - unknown value in products: "
+ + product
+ + ".\nSee: {}".format(BASE_DOC_URL)
+ ).handle_later()
+ if utils.is_geckoview_streaming_product(product):
+ keyed = definition.get("keyed")
+ if keyed:
+ ParserError(
+ "%s - keyed Scalars not supported for product %s"
+ % (self._name, product)
+ ).handle_later()
+
+ # Validate the expiration version.
+ # Historical versions of Scalars.json may contain expiration versions
+ # using the deprecated format 'N.Na1'. Those scripts set
+ # self._strict_type_checks to false.
+ expires = definition.get("expires")
+ if not utils.validate_expiration_version(expires) and self._strict_type_checks:
+ ParserError(
+ "{} - invalid expires: {}.\nSee: {}#required-fields".format(
+ self._name, expires, BASE_DOC_URL
+ )
+ ).handle_later()
+
+ @property
+ def category(self):
+ """Get the category name"""
+ return self._category_name
+
+ @property
+ def name(self):
+ """Get the scalar name"""
+ return self._name
+
+ @property
+ def label(self):
+ """Get the scalar label generated from the scalar and category names."""
+ return self._category_name + "." + self._name
+
+ @property
+ def enum_label(self):
+ """Get the enum label generated from the scalar and category names. This is used to
+ generate the enum tables."""
+
+ # The scalar name can contain informations about its hierarchy (e.g. 'a.b.scalar').
+ # We can't have dots in C++ enums, replace them with an underscore. Also, make the
+ # label upper case for consistency with the histogram enums.
+ return self.label.replace(".", "_").upper()
+
+ @property
+ def bug_numbers(self):
+ """Get the list of related bug numbers"""
+ return self._definition["bug_numbers"]
+
+ @property
+ def description(self):
+ """Get the scalar description"""
+ return self._definition["description"]
+
+ @property
+ def expires(self):
+ """Get the scalar expiration"""
+ return self._expires
+
+ @property
+ def kind(self):
+ """Get the scalar kind"""
+ return self._definition["kind"]
+
+ @property
+ def keys(self):
+ """Get the allowed keys for this scalar or [] if there aren't any'"""
+ return self._definition.get("keys", [])
+
+ @property
+ def keyed(self):
+ """Boolean indicating whether this is a keyed scalar"""
+ return self._definition.get("keyed", False)
+
+ @property
+ def nsITelemetry_kind(self):
+ """Get the scalar kind constant defined in nsITelemetry"""
+ return SCALAR_TYPES_MAP.get(self.kind)
+
+ @property
+ def notification_emails(self):
+ """Get the list of notification emails"""
+ return self._definition["notification_emails"]
+
+ @property
+ def record_in_processes(self):
+ """Get the non-empty list of processes to record data in"""
+ # Before we added content process support in bug 1278556, we only recorded in the
+ # main process.
+ return self._definition.get("record_in_processes", ["main"])
+
+ @property
+ def record_in_processes_enum(self):
+ """Get the non-empty list of flags representing the processes to record data in"""
+ return [utils.process_name_to_enum(p) for p in self.record_in_processes]
+
+ @property
+ def products(self):
+ """Get the non-empty list of products to record data on"""
+ return self._definition.get("products")
+
+ @property
+ def products_enum(self):
+ """Get the non-empty list of flags representing products to record data on"""
+ return [utils.product_name_to_enum(p) for p in self.products]
+
+ @property
+ def dataset(self):
+ """Get the nsITelemetry constant equivalent to the chosen release channel collection
+ policy for the scalar.
+ """
+ rcc = self.dataset_short
+ table = {
+ "opt-in": "DATASET_PRERELEASE_CHANNELS",
+ "opt-out": "DATASET_ALL_CHANNELS",
+ }
+ return "nsITelemetry::" + table[rcc]
+
+ @property
+ def dataset_short(self):
+ """Get the short name of the chosen release channel collection policy for the scalar."""
+ # The collection policy is optional, but we still define a default
+ # behaviour for it.
+ return self._definition.get("release_channel_collection", "opt-in")
+
+ @property
+ def operating_systems(self):
+ """Get the list of operating systems to record data on"""
+ return self._definition.get("operating_systems", ["all"])
+
+ def record_on_os(self, target_os):
+ """Check if this probe should be recorded on the passed os."""
+ os = self.operating_systems
+ if "all" in os:
+ return True
+
+ canonical_os = utils.canonical_os(target_os)
+
+ if "unix" in os and canonical_os in utils.UNIX_LIKE_OS:
+ return True
+
+ return canonical_os in os
+
+ @property
+ def record_into_store(self):
+ """Get the list of stores this probe should be recorded into"""
+ return self._definition.get("record_into_store", ["main"])
+
+
+def load_scalars(filename, strict_type_checks=True):
+ """Parses a YAML file containing the scalar definition.
+
+ :param filename: the YAML file containing the scalars definition.
+ :raises ParserError: if the scalar file cannot be opened or parsed.
+ """
+
+ # Parse the scalar definitions from the YAML file.
+ scalars = None
+ try:
+ with io.open(filename, "r", encoding="utf-8") as f:
+ scalars = yaml.safe_load(f)
+ except IOError as e:
+ ParserError("Error opening " + filename + ": " + str(e)).handle_now()
+ except ValueError as e:
+ ParserError(
+ "Error parsing scalars in {}: {}"
+ ".\nSee: {}".format(filename, e, BASE_DOC_URL)
+ ).handle_now()
+
+ scalar_list = []
+
+ # Scalars are defined in a fixed two-level hierarchy within the definition file.
+ # The first level contains the category name, while the second level contains the
+ # probe name (e.g. "category.name: probe: ...").
+ for category_name in sorted(scalars):
+ category = scalars[category_name]
+
+ # Make sure that the category has at least one probe in it.
+ if not category or len(category) == 0:
+ ParserError(
+ 'Category "{}" must have at least one probe in it'
+ ".\nSee: {}".format(category_name, BASE_DOC_URL)
+ ).handle_later()
+
+ for probe_name in sorted(category):
+ # We found a scalar type. Go ahead and parse it.
+ scalar_info = category[probe_name]
+ scalar_list.append(
+ ScalarType(category_name, probe_name, scalar_info, strict_type_checks)
+ )
+
+ return scalar_list
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_user_interactions.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_user_interactions.py
new file mode 100644
index 0000000000..6863d67ec4
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_user_interactions.py
@@ -0,0 +1,256 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import atexit
+import io
+import re
+
+import yaml
+
+from .shared_telemetry_utils import ParserError
+
+atexit.register(ParserError.exit_func)
+
+BASE_DOC_URL = (
+ "https://firefox-source-docs.mozilla.org/toolkit/components/"
+ + "telemetry/telemetry/collection/user_interactions.html"
+)
+
+
+class UserInteractionType:
+ """A class for representing a UserInteraction definition."""
+
+ def __init__(self, category_name, user_interaction_name, definition):
+ # Validate and set the name, so we don't need to pass it to the other
+ # validation functions.
+ self.validate_names(category_name, user_interaction_name)
+ self._name = user_interaction_name
+ self._category_name = category_name
+
+ # Validating the UserInteraction definition.
+ self.validate_types(definition)
+
+ # Everything is ok, set the rest of the data.
+ self._definition = definition
+
+ def validate_names(self, category_name, user_interaction_name):
+ """Validate the category and UserInteraction name:
+ - Category name must be alpha-numeric + '.', no leading/trailing digit or '.'.
+ - UserInteraction name must be alpha-numeric + '_', no leading/trailing digit or '_'.
+
+ :param category_name: the name of the category the UserInteraction is in.
+ :param user_interaction_name: the name of the UserInteraction.
+ :raises ParserError: if the length of the names exceeds the limit or they don't
+ conform our name specification.
+ """
+
+ # Enforce a maximum length on category and UserInteraction names.
+ MAX_NAME_LENGTH = 40
+ for n in [category_name, user_interaction_name]:
+ if len(n) > MAX_NAME_LENGTH:
+ ParserError(
+ (
+ "Name '{}' exceeds maximum name length of {} characters.\n"
+ "See: {}#the-yaml-definition-file"
+ ).format(n, MAX_NAME_LENGTH, BASE_DOC_URL)
+ ).handle_later()
+
+ def check_name(name, error_msg_prefix, allowed_char_regexp):
+ # Check if we only have the allowed characters.
+ chars_regxp = r"^[a-zA-Z0-9" + allowed_char_regexp + r"]+$"
+ if not re.search(chars_regxp, name):
+ ParserError(
+ (
+ error_msg_prefix + " name must be alpha-numeric. Got: '{}'.\n"
+ "See: {}#the-yaml-definition-file"
+ ).format(name, BASE_DOC_URL)
+ ).handle_later()
+
+ # Don't allow leading/trailing digits, '.' or '_'.
+ if re.search(r"(^[\d\._])|([\d\._])$", name):
+ ParserError(
+ (
+ error_msg_prefix + " name must not have a leading/trailing "
+ "digit, a dot or underscore. Got: '{}'.\n"
+ " See: {}#the-yaml-definition-file"
+ ).format(name, BASE_DOC_URL)
+ ).handle_later()
+
+ check_name(category_name, "Category", r"\.")
+ check_name(user_interaction_name, "UserInteraction", r"_")
+
+ def validate_types(self, definition):
+ """This function performs some basic sanity checks on the UserInteraction
+ definition:
+ - Checks that all the required fields are available.
+ - Checks that all the fields have the expected types.
+
+ :param definition: the dictionary containing the UserInteraction
+ properties.
+ :raises ParserError: if a UserInteraction definition field is of the
+ wrong type.
+ :raises ParserError: if a required field is missing or unknown fields are present.
+ """
+
+ # The required and optional fields in a UserInteraction definition.
+ REQUIRED_FIELDS = {
+ "bug_numbers": list, # This contains ints. See LIST_FIELDS_CONTENT.
+ "description": str,
+ }
+
+ # The types for the data within the fields that hold lists.
+ LIST_FIELDS_CONTENT = {
+ "bug_numbers": int,
+ }
+
+ ALL_FIELDS = REQUIRED_FIELDS.copy()
+
+ # Checks that all the required fields are available.
+ missing_fields = [f for f in REQUIRED_FIELDS.keys() if f not in definition]
+ if len(missing_fields) > 0:
+ ParserError(
+ self._name
+ + " - missing required fields: "
+ + ", ".join(missing_fields)
+ + ".\nSee: {}#required-fields".format(BASE_DOC_URL)
+ ).handle_later()
+
+ # Do we have any unknown field?
+ unknown_fields = [f for f in definition.keys() if f not in ALL_FIELDS]
+ if len(unknown_fields) > 0:
+ ParserError(
+ self._name
+ + " - unknown fields: "
+ + ", ".join(unknown_fields)
+ + ".\nSee: {}#required-fields".format(BASE_DOC_URL)
+ ).handle_later()
+
+ # Checks the type for all the fields.
+ wrong_type_names = [
+ "{} must be {}".format(f, str(ALL_FIELDS[f]))
+ for f in definition.keys()
+ if not isinstance(definition[f], ALL_FIELDS[f])
+ ]
+ if len(wrong_type_names) > 0:
+ ParserError(
+ self._name
+ + " - "
+ + ", ".join(wrong_type_names)
+ + ".\nSee: {}#required-fields".format(BASE_DOC_URL)
+ ).handle_later()
+
+ # Check that the lists are not empty and that data in the lists
+ # have the correct types.
+ list_fields = [f for f in definition if isinstance(definition[f], list)]
+ for field in list_fields:
+ # Check for empty lists.
+ if len(definition[field]) == 0:
+ ParserError(
+ (
+ "Field '{}' for probe '{}' must not be empty"
+ + ".\nSee: {}#required-fields)"
+ ).format(field, self._name, BASE_DOC_URL)
+ ).handle_later()
+ # Check the type of the list content.
+ broken_types = [
+ not isinstance(v, LIST_FIELDS_CONTENT[field]) for v in definition[field]
+ ]
+ if any(broken_types):
+ ParserError(
+ (
+ "Field '{}' for probe '{}' must only contain values of type {}"
+ ".\nSee: {}#the-yaml-definition-file)"
+ ).format(
+ field,
+ self._name,
+ str(LIST_FIELDS_CONTENT[field]),
+ BASE_DOC_URL,
+ )
+ ).handle_later()
+
+ @property
+ def category(self):
+ """Get the category name"""
+ return self._category_name
+
+ @property
+ def name(self):
+ """Get the UserInteraction name"""
+ return self._name
+
+ @property
+ def label(self):
+ """Get the UserInteraction label generated from the UserInteraction
+ and category names.
+ """
+ return self._category_name + "." + self._name
+
+ @property
+ def bug_numbers(self):
+ """Get the list of related bug numbers"""
+ return self._definition["bug_numbers"]
+
+ @property
+ def description(self):
+ """Get the UserInteraction description"""
+ return self._definition["description"]
+
+
+def load_user_interactions(filename):
+ """Parses a YAML file containing the UserInteraction definition.
+
+ :param filename: the YAML file containing the UserInteraction definition.
+ :raises ParserError: if the UserInteraction file cannot be opened or
+ parsed.
+ """
+
+ # Parse the UserInteraction definitions from the YAML file.
+ user_interactions = None
+ try:
+ with io.open(filename, "r", encoding="utf-8") as f:
+ user_interactions = yaml.safe_load(f)
+ except IOError as e:
+ ParserError("Error opening " + filename + ": " + str(e)).handle_now()
+ except ValueError as e:
+ ParserError(
+ "Error parsing UserInteractions in {}: {}"
+ ".\nSee: {}".format(filename, e, BASE_DOC_URL)
+ ).handle_now()
+
+ user_interaction_list = []
+
+ # UserInteractions are defined in a fixed two-level hierarchy within the
+ # definition file. The first level contains the category name, while the
+ # second level contains the UserInteraction name
+ # (e.g. "category.name: user.interaction: ...").
+ for category_name in sorted(user_interactions):
+ category = user_interactions[category_name]
+
+ # Make sure that the category has at least one UserInteraction in it.
+ if not category or len(category) == 0:
+ ParserError(
+ 'Category "{}" must have at least one UserInteraction in it'
+ ".\nSee: {}".format(category_name, BASE_DOC_URL)
+ ).handle_later()
+
+ for user_interaction_name in sorted(category):
+ # We found a UserInteraction type. Go ahead and parse it.
+ user_interaction_info = category[user_interaction_name]
+ user_interaction_list.append(
+ UserInteractionType(
+ category_name, user_interaction_name, user_interaction_info
+ )
+ )
+
+ return user_interaction_list
+
+
+def from_files(filenames):
+ all_user_interactions = []
+
+ for filename in filenames:
+ all_user_interactions += load_user_interactions(filename)
+
+ for user_interaction in all_user_interactions:
+ yield user_interaction
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/shared_telemetry_utils.py b/toolkit/components/telemetry/build_scripts/mozparsers/shared_telemetry_utils.py
new file mode 100644
index 0000000000..4b4cc9f685
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/shared_telemetry_utils.py
@@ -0,0 +1,185 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This file contains utility functions shared by the scalars and the histogram generation
+# scripts.
+
+import os
+import re
+import sys
+
+import yaml
+
+# This is a list of flags that determine which process a measurement is allowed
+# to record from.
+KNOWN_PROCESS_FLAGS = {
+ "all": "All",
+ "all_children": "AllChildren",
+ "main": "Main",
+ "content": "Content",
+ "gpu": "Gpu",
+ "rdd": "Rdd",
+ "socket": "Socket",
+ "utility": "Utility",
+ # Historical Values
+ "all_childs": "AllChildren", # Supporting files from before bug 1363725
+}
+
+GECKOVIEW_STREAMING_PRODUCT = "geckoview_streaming"
+
+SUPPORTED_PRODUCTS = {
+ "firefox": "Firefox",
+ "fennec": "Fennec",
+ GECKOVIEW_STREAMING_PRODUCT: "GeckoviewStreaming",
+ "thunderbird": "Thunderbird",
+ # Historical, deprecated values:
+ # 'geckoview': 'Geckoview',
+}
+
+SUPPORTED_OPERATING_SYSTEMS = [
+ "mac",
+ "linux",
+ "windows",
+ "android",
+ "unix",
+ "all",
+]
+
+# mozinfo identifies linux, BSD variants, Solaris and SunOS as unix
+# Solaris and SunOS are identified as "unix" OS.
+UNIX_LIKE_OS = [
+ "unix",
+ "linux",
+ "bsd",
+]
+
+CANONICAL_OPERATING_SYSTEMS = {
+ "darwin": "mac",
+ "linux": "linux",
+ "winnt": "windows",
+ "android": "android",
+ # for simplicity we treat all BSD and Solaris systems as unix
+ "gnu/kfreebsd": "unix",
+ "sunos": "unix",
+ "dragonfly": "unix",
+ "freeunix": "unix",
+ "netunix": "unix",
+ "openunix": "unix",
+}
+
+PROCESS_ENUM_PREFIX = "mozilla::Telemetry::Common::RecordedProcessType::"
+PRODUCT_ENUM_PREFIX = "mozilla::Telemetry::Common::SupportedProduct::"
+
+
+class ParserError(Exception):
+ """Thrown by different probe parsers. Errors are partitioned into
+ 'immediately fatal' and 'eventually fatal' so that the parser can print
+ multiple error messages at a time. See bug 1401612 ."""
+
+ eventual_errors = []
+
+ def __init__(self, *args):
+ Exception.__init__(self, *args)
+
+ def handle_later(self):
+ ParserError.eventual_errors.append(self)
+
+ def handle_now(self):
+ ParserError.print_eventuals()
+ print(str(self), file=sys.stderr)
+ sys.stderr.flush()
+ os._exit(1)
+
+ @classmethod
+ def print_eventuals(cls):
+ while cls.eventual_errors:
+ print(str(cls.eventual_errors.pop(0)), file=sys.stderr)
+
+ @classmethod
+ def exit_func(cls):
+ if cls.eventual_errors:
+ cls("Some errors occurred").handle_now()
+
+
+def is_valid_process_name(name):
+ return name in KNOWN_PROCESS_FLAGS
+
+
+def process_name_to_enum(name):
+ return PROCESS_ENUM_PREFIX + KNOWN_PROCESS_FLAGS.get(name)
+
+
+def is_valid_product(name):
+ return name in SUPPORTED_PRODUCTS
+
+
+def is_geckoview_streaming_product(name):
+ return name == GECKOVIEW_STREAMING_PRODUCT
+
+
+def is_valid_os(name):
+ return name in SUPPORTED_OPERATING_SYSTEMS
+
+
+def canonical_os(os):
+ """Translate possible OS_TARGET names to their canonical value."""
+
+ return CANONICAL_OPERATING_SYSTEMS.get(os.lower()) or "unknown"
+
+
+def product_name_to_enum(product):
+ if not is_valid_product(product):
+ raise ParserError("Invalid product {}".format(product))
+ return PRODUCT_ENUM_PREFIX + SUPPORTED_PRODUCTS.get(product)
+
+
+def static_assert(output, expression, message):
+ """Writes a C++ compile-time assertion expression to a file.
+ :param output: the output stream.
+ :param expression: the expression to check.
+ :param message: the string literal that will appear if the expression evaluates to
+ false.
+ """
+ print('static_assert(%s, "%s");' % (expression, message), file=output)
+
+
+def validate_expiration_version(expiration):
+ """Makes sure the expiration version has the expected format.
+
+ Allowed examples: "10", "20", "60", "never"
+ Disallowed examples: "Never", "asd", "4000000", "60a1", "30.5a1"
+
+ :param expiration: the expiration version string.
+ :return: True if the expiration validates correctly, False otherwise.
+ """
+ if expiration != "never" and not re.match(r"^\d{1,3}$", expiration):
+ return False
+
+ return True
+
+
+def add_expiration_postfix(expiration):
+ """Formats the expiration version and adds a version postfix if needed.
+
+ :param expiration: the expiration version string.
+ :return: the modified expiration string.
+ """
+ if re.match(r"^[1-9][0-9]*$", expiration):
+ return expiration + ".0a1"
+
+ if re.match(r"^[1-9][0-9]*\.0$", expiration):
+ return expiration + "a1"
+
+ return expiration
+
+
+def load_yaml_file(filename):
+ """Load a YAML file from disk, throw a ParserError on failure."""
+ try:
+ with open(filename, "r") as f:
+ return yaml.safe_load(f)
+ except IOError as e:
+ raise ParserError("Error opening " + filename + ": " + str(e))
+ except ValueError as e:
+ raise ParserError("Error parsing processes in {}: {}".format(filename, e))
diff --git a/toolkit/components/telemetry/build_scripts/run_glean_parser.py b/toolkit/components/telemetry/build_scripts/run_glean_parser.py
new file mode 100644
index 0000000000..e71206e9b0
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/run_glean_parser.py
@@ -0,0 +1,17 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import sys
+from pathlib import Path
+
+from glean_parser import lint
+
+
+def main(output, *filenames):
+ if lint.glinter([Path(x) for x in filenames], {"allow_reserved": False}):
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/setup.py b/toolkit/components/telemetry/build_scripts/setup.py
new file mode 100644
index 0000000000..bd8967aec5
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/setup.py
@@ -0,0 +1,32 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from setuptools import find_packages, setup
+
+VERSION = "1.0.0"
+
+with open("README.md", "r") as fh:
+ long_description = fh.read()
+
+setup(
+ author="Mozilla Telemetry Team",
+ author_email="telemetry-client-dev@mozilla.com",
+ url=(
+ "https://firefox-source-docs.mozilla.org/"
+ "toolkit/components/telemetry/telemetry/collection/index.html"
+ ),
+ name="mozparsers",
+ description="Shared parsers for the Telemetry probe regitries.",
+ long_description=long_description,
+ long_description_content_type="text/markdown",
+ license="MPL 2.0",
+ packages=find_packages(),
+ version=VERSION,
+ classifiers=[
+ "Topic :: Software Development :: Build Tools",
+ "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
+ "Programming Language :: Python :: 2.7",
+ ],
+ keywords=["mozilla", "telemetry", "parsers"],
+)