From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Fri, 19 Apr 2024 02:47:55 +0200
Subject: Adding upstream version 124.0.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 .../components/telemetry/build_scripts/README.md   |   5 +
 .../telemetry/build_scripts/gen_event_data.py      | 227 ++++++
 .../telemetry/build_scripts/gen_event_enum.py      |  81 ++
 .../telemetry/build_scripts/gen_histogram_data.py  | 297 ++++++++
 .../telemetry/build_scripts/gen_histogram_enum.py  |  94 +++
 .../telemetry/build_scripts/gen_histogram_phf.py   |  73 ++
 .../telemetry/build_scripts/gen_process_data.py    |  80 ++
 .../telemetry/build_scripts/gen_process_enum.py    |  69 ++
 .../telemetry/build_scripts/gen_scalar_data.py     | 216 ++++++
 .../telemetry/build_scripts/gen_scalar_enum.py     |  60 ++
 .../build_scripts/gen_userinteraction_data.py      | 105 +++
 .../build_scripts/gen_userinteraction_phf.py       |  70 ++
 .../telemetry/build_scripts/mozparsers/__init__.py |   3 +
 .../build_scripts/mozparsers/parse_events.py       | 477 ++++++++++++
 .../build_scripts/mozparsers/parse_histograms.py   | 836 +++++++++++++++++++++
 .../build_scripts/mozparsers/parse_scalars.py      | 503 +++++++++++++
 .../mozparsers/parse_user_interactions.py          | 256 +++++++
 .../mozparsers/shared_telemetry_utils.py           | 185 +++++
 .../telemetry/build_scripts/run_glean_parser.py    |  17 +
 .../components/telemetry/build_scripts/setup.py    |  32 +
 20 files changed, 3686 insertions(+)
 create mode 100644 toolkit/components/telemetry/build_scripts/README.md
 create mode 100644 toolkit/components/telemetry/build_scripts/gen_event_data.py
 create mode 100644 toolkit/components/telemetry/build_scripts/gen_event_enum.py
 create mode 100644 toolkit/components/telemetry/build_scripts/gen_histogram_data.py
 create mode 100644 toolkit/components/telemetry/build_scripts/gen_histogram_enum.py
 create mode 100644 toolkit/components/telemetry/build_scripts/gen_histogram_phf.py
 create mode 100644 toolkit/components/telemetry/build_scripts/gen_process_data.py
 create mode 100644 toolkit/components/telemetry/build_scripts/gen_process_enum.py
 create mode 100644 toolkit/components/telemetry/build_scripts/gen_scalar_data.py
 create mode 100644 toolkit/components/telemetry/build_scripts/gen_scalar_enum.py
 create mode 100644 toolkit/components/telemetry/build_scripts/gen_userinteraction_data.py
 create mode 100644 toolkit/components/telemetry/build_scripts/gen_userinteraction_phf.py
 create mode 100644 toolkit/components/telemetry/build_scripts/mozparsers/__init__.py
 create mode 100644 toolkit/components/telemetry/build_scripts/mozparsers/parse_events.py
 create mode 100644 toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py
 create mode 100644 toolkit/components/telemetry/build_scripts/mozparsers/parse_scalars.py
 create mode 100644 toolkit/components/telemetry/build_scripts/mozparsers/parse_user_interactions.py
 create mode 100644 toolkit/components/telemetry/build_scripts/mozparsers/shared_telemetry_utils.py
 create mode 100644 toolkit/components/telemetry/build_scripts/run_glean_parser.py
 create mode 100644 toolkit/components/telemetry/build_scripts/setup.py

(limited to 'toolkit/components/telemetry/build_scripts')

diff --git a/toolkit/components/telemetry/build_scripts/README.md b/toolkit/components/telemetry/build_scripts/README.md
new file mode 100644
index 0000000000..4823580735
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/README.md
@@ -0,0 +1,5 @@
+# Telemetry Registries Parsers
+This package exports the parsers for Mozilla's probes registries. These registry file contains the definitions for the different probes (i.e. [scalars](https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/telemetry/collection/scalars.html), [histograms](https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/telemetry/collection/histograms.html) and [events](https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/telemetry/collection/events.html)) that can be used to collect data.
+
+# License
+Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/toolkit/components/telemetry/build_scripts/gen_event_data.py b/toolkit/components/telemetry/build_scripts/gen_event_data.py
new file mode 100644
index 0000000000..2e321cea72
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_event_data.py
@@ -0,0 +1,227 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out event information for C++. The events are defined
+# in a file provided as a command-line argument.
+
+import itertools
+import json
+import sys
+from collections import OrderedDict
+from os import path
+
+from mozparsers import parse_events
+from mozparsers.shared_telemetry_utils import ParserError, static_assert
+
+COMPONENTS_PATH = path.abspath(
+    path.join(path.dirname(__file__), path.pardir, path.pardir)
+)
+sys.path.append(
+    path.join(COMPONENTS_PATH, "glean", "build_scripts", "glean_parser_ext")
+)
+from string_table import StringTable
+
+# The banner/text at the top of the generated file.
+banner = """/* This file is auto-generated, only for internal use in TelemetryEvent.h,
+   see gen_event_data.py. */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryEventData_h
+#define mozilla_TelemetryEventData_h
+#include "core/EventInfo.h"
+#include "nsITelemetry.h"
+namespace {
+"""
+
+file_footer = """\
+} // namespace
+#endif // mozilla_TelemetryEventData_h
+"""
+
+
+def write_extra_table(events, output, string_table):
+    table_name = "gExtraKeysTable"
+    extra_table = []
+    extra_count = 0
+
+    print("#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+    print("const uint32_t %s[] = {" % table_name, file=output)
+    print("#else", file=output)
+    print("constexpr uint32_t %s[] = {" % table_name, file=output)
+    print("#endif", file=output)
+
+    for e in events:
+        extra_index = 0
+        extra_keys = e.extra_keys
+        if len(extra_keys) > 0:
+            extra_index = extra_count
+            extra_count += len(extra_keys)
+            indexes = string_table.stringIndexes(extra_keys)
+
+            print(
+                "  // %s, [%s], [%s]"
+                % (e.category, ", ".join(e.methods), ", ".join(e.objects)),
+                file=output,
+            )
+            print("  // extra_keys: %s" % ", ".join(extra_keys), file=output)
+            print("  %s," % ", ".join(map(str, indexes)), file=output)
+
+        extra_table.append((extra_index, len(extra_keys)))
+
+    print("};", file=output)
+    static_assert(output, "sizeof(%s) <= UINT32_MAX" % table_name, "index overflow")
+
+    return extra_table
+
+
+def write_common_event_table(events, output, string_table, extra_table):
+    table_name = "gCommonEventInfo"
+
+    print("#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+    print("const CommonEventInfo %s[] = {" % table_name, file=output)
+    print("#else", file=output)
+    print("constexpr CommonEventInfo %s[] = {" % table_name, file=output)
+    print("#endif", file=output)
+
+    for e, extras in zip(events, extra_table):
+        # Write a comment to make the file human-readable.
+        print("  // category: %s" % e.category, file=output)
+        print("  // methods: [%s]" % ", ".join(e.methods), file=output)
+        print("  // objects: [%s]" % ", ".join(e.objects), file=output)
+
+        # Write the common info structure
+        print(
+            "  {%d, %d, %d, %d, %s, %s, %s },"
+            % (
+                string_table.stringIndex(e.category),
+                string_table.stringIndex(e.expiry_version),
+                extras[0],  # extra keys index
+                extras[1],  # extra keys count
+                e.dataset,
+                " | ".join(e.record_in_processes_enum),
+                " | ".join(e.products_enum),
+            ),
+            file=output,
+        )
+
+    print("};", file=output)
+    static_assert(output, "sizeof(%s) <= UINT32_MAX" % table_name, "index overflow")
+
+
+def write_event_table(events, output, string_table):
+    table_name = "gEventInfo"
+
+    print("#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+    print("const EventInfo %s[] = {" % table_name, file=output)
+    print("#else", file=output)
+    print("constexpr EventInfo %s[] = {" % table_name, file=output)
+    print("#endif", file=output)
+
+    for common_info_index, e in enumerate(events):
+        for method_name, object_name in itertools.product(e.methods, e.objects):
+            print(
+                "  // category: %s, method: %s, object: %s"
+                % (e.category, method_name, object_name),
+                file=output,
+            )
+
+            print(
+                "  {gCommonEventInfo[%d], %d, %d},"
+                % (
+                    common_info_index,
+                    string_table.stringIndex(method_name),
+                    string_table.stringIndex(object_name),
+                ),
+                file=output,
+            )
+
+    print("};", file=output)
+    static_assert(output, "sizeof(%s) <= UINT32_MAX" % table_name, "index overflow")
+
+
+def generate_JSON_definitions(output, *filenames):
+    """Write the event definitions to a JSON file.
+
+    :param output: the file to write the content to.
+    :param filenames: a list of filenames provided by the build system.
+           We only support a single file.
+    """
+    # Load the event data.
+    events = []
+    for filename in filenames:
+        try:
+            batch = parse_events.load_events(filename, True)
+            events.extend(batch)
+        except ParserError as ex:
+            print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr)
+            sys.exit(1)
+
+    event_definitions = OrderedDict()
+    for event in events:
+        category = event.category
+
+        if category not in event_definitions:
+            event_definitions[category] = OrderedDict()
+
+        event_definitions[category][event.name] = OrderedDict(
+            {
+                "methods": event.methods,
+                "objects": event.objects,
+                "extra_keys": event.extra_keys,
+                "record_on_release": True
+                if event.dataset_short == "opt-out"
+                else False,
+                # We don't expire dynamic-builtin scalars: they're only meant for
+                # use in local developer builds anyway. They will expire when rebuilding.
+                "expires": event.expiry_version,
+                "expired": False,
+                "products": event.products,
+            }
+        )
+
+    json.dump(event_definitions, output, sort_keys=True)
+
+
+def main(output, *filenames):
+    # Load the event data.
+    events = []
+    for filename in filenames:
+        try:
+            batch = parse_events.load_events(filename, True)
+            events.extend(batch)
+        except ParserError as ex:
+            print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr)
+            sys.exit(1)
+
+    # Write the scalar data file.
+    print(banner, file=output)
+    print(file_header, file=output)
+
+    # Write the extra keys table.
+    string_table = StringTable()
+    extra_table = write_extra_table(events, output, string_table)
+    print("", file=output)
+
+    # Write a table with the common event data.
+    write_common_event_table(events, output, string_table, extra_table)
+    print("", file=output)
+
+    # Write the data for individual events.
+    write_event_table(events, output, string_table)
+    print("", file=output)
+
+    # Write the string table.
+    string_table_name = "gEventsStringTable"
+    string_table.writeDefinition(output, string_table_name)
+    static_assert(
+        output, "sizeof(%s) <= UINT32_MAX" % string_table_name, "index overflow"
+    )
+    print("", file=output)
+
+    print(file_footer, file=output)
+
+
+if __name__ == "__main__":
+    main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_event_enum.py b/toolkit/components/telemetry/build_scripts/gen_event_enum.py
new file mode 100644
index 0000000000..9dd418b3dd
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_event_enum.py
@@ -0,0 +1,81 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out C++ enum definitions that represent the different event types.
+#
+# The events are defined in files provided as command-line arguments.
+
+import sys
+
+import buildconfig
+from mozparsers import parse_events
+from mozparsers.shared_telemetry_utils import ParserError
+
+banner = """/* This file is auto-generated, see gen_event_enum.py.  */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryEventEnums_h
+#define mozilla_TelemetryEventEnums_h
+
+#include <stdint.h>
+
+namespace mozilla {
+namespace Telemetry {
+enum class EventID : uint32_t {\
+"""
+
+file_footer = """\
+};
+} // namespace mozilla
+} // namespace Telemetry
+#endif // mozilla_TelemetryEventEnums_h
+"""
+
+
+def main(output, *filenames):
+    # Load the events first.
+    events = []
+    for filename in filenames:
+        try:
+            batch = parse_events.load_events(filename, True)
+            events.extend(batch)
+        except ParserError as ex:
+            print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr)
+            sys.exit(1)
+
+    grouped = dict()
+    index = 0
+    for e in events:
+        category = e.category
+        if category not in grouped:
+            grouped[category] = []
+        grouped[category].append((index, e))
+        index += len(e.enum_labels)
+
+    # Write the enum file.
+    print(banner, file=output)
+    print(file_header, file=output)
+
+    for category, indexed in sorted(grouped.items()):
+        category_cpp = indexed[0][1].category_cpp
+
+        print("  // category: %s" % category, file=output)
+
+        for event_index, e in indexed:
+            if e.record_on_os(buildconfig.substs["OS_TARGET"]):
+                for offset, label in enumerate(e.enum_labels):
+                    print(
+                        " %s_%s = %d," % (category_cpp, label, event_index + offset),
+                        file=output,
+                    )
+
+    print("  // meta", file=output)
+    print("  EventCount = %d," % index, file=output)
+
+    print(file_footer, file=output)
+
+
+if __name__ == "__main__":
+    main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_histogram_data.py b/toolkit/components/telemetry/build_scripts/gen_histogram_data.py
new file mode 100644
index 0000000000..a203dde9f9
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_histogram_data.py
@@ -0,0 +1,297 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out histogram information for C++.  The histograms are defined
+# in a file provided as a command-line argument.
+
+import sys
+from os import path
+
+import buildconfig
+from mozparsers import parse_histograms
+from mozparsers.shared_telemetry_utils import ParserError, static_assert
+
+COMPONENTS_PATH = path.abspath(
+    path.join(path.dirname(__file__), path.pardir, path.pardir)
+)
+sys.path.append(
+    path.join(COMPONENTS_PATH, "glean", "build_scripts", "glean_parser_ext")
+)
+from string_table import StringTable
+
+banner = """/* This file is auto-generated, see gen_histogram_data.py.  */
+"""
+
+
+def print_array_entry(
+    output,
+    histogram,
+    name_index,
+    exp_index,
+    label_index,
+    label_count,
+    key_index,
+    key_count,
+    store_index,
+    store_count,
+):
+    if histogram.record_on_os(buildconfig.substs["OS_TARGET"]):
+        print(
+            "  { %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %s, %s, %s, %s, %s, %s },"
+            % (
+                histogram.low(),
+                histogram.high(),
+                histogram.n_buckets(),
+                name_index,
+                exp_index,
+                label_count,
+                key_count,
+                store_count,
+                label_index,
+                key_index,
+                store_index,
+                " | ".join(histogram.record_in_processes_enum()),
+                "true" if histogram.keyed() else "false",
+                histogram.nsITelemetry_kind(),
+                histogram.dataset(),
+                " | ".join(histogram.products_enum()),
+            ),
+            file=output,
+        )
+
+
+def write_histogram_table(output, histograms):
+    string_table = StringTable()
+
+    label_table = []
+    label_count = 0
+    keys_table = []
+    keys_count = 0
+    store_table = []
+    total_store_count = 0
+
+    print("constexpr HistogramInfo gHistogramInfos[] = {", file=output)
+    for histogram in histograms:
+        name_index = string_table.stringIndex(histogram.name())
+        exp_index = string_table.stringIndex(histogram.expiration())
+
+        labels = histogram.labels()
+        label_index = 0
+        if len(labels) > 0:
+            label_index = label_count
+            label_table.append((histogram.name(), string_table.stringIndexes(labels)))
+            label_count += len(labels)
+
+        keys = histogram.keys()
+        key_index = 0
+        if len(keys) > 0:
+            key_index = keys_count
+            keys_table.append((histogram.name(), string_table.stringIndexes(keys)))
+            keys_count += len(keys)
+
+        stores = histogram.record_into_store()
+        store_index = 0
+        if stores == ["main"]:
+            # if count == 1 && offset == UINT16_MAX -> only main store
+            store_index = "UINT16_MAX"
+        else:
+            store_index = total_store_count
+            store_table.append((histogram.name(), string_table.stringIndexes(stores)))
+            total_store_count += len(stores)
+
+        print_array_entry(
+            output,
+            histogram,
+            name_index,
+            exp_index,
+            label_index,
+            len(labels),
+            key_index,
+            len(keys),
+            store_index,
+            len(stores),
+        )
+    print("};\n", file=output)
+
+    strtab_name = "gHistogramStringTable"
+    string_table.writeDefinition(output, strtab_name)
+    static_assert(output, "sizeof(%s) <= UINT32_MAX" % strtab_name, "index overflow")
+
+    print("\n#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+    print("const uint32_t gHistogramLabelTable[] = {", file=output)
+    print("#else", file=output)
+    print("constexpr uint32_t gHistogramLabelTable[] = {", file=output)
+    print("#endif", file=output)
+    for name, indexes in label_table:
+        print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output)
+    print("};", file=output)
+    static_assert(
+        output, "sizeof(gHistogramLabelTable) <= UINT16_MAX", "index overflow"
+    )
+
+    print("\n#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+    print("const uint32_t gHistogramKeyTable[] = {", file=output)
+    print("#else", file=output)
+    print("constexpr uint32_t gHistogramKeyTable[] = {", file=output)
+    print("#endif", file=output)
+    for name, indexes in keys_table:
+        print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output)
+    print("};", file=output)
+    static_assert(output, "sizeof(gHistogramKeyTable) <= UINT16_MAX", "index overflow")
+
+    store_table_name = "gHistogramStoresTable"
+    print("\n#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+    print("const uint32_t {}[] = {{".format(store_table_name), file=output)
+    print("#else", file=output)
+    print("constexpr uint32_t {}[] = {{".format(store_table_name), file=output)
+    print("#endif", file=output)
+    for name, indexes in store_table:
+        print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output)
+    print("};", file=output)
+    static_assert(
+        output, "sizeof(%s) <= UINT16_MAX" % store_table_name, "index overflow"
+    )
+
+
+# Write out static asserts for histogram data.  We'd prefer to perform
+# these checks in this script itself, but since several histograms
+# (generally enumerated histograms) use compile-time constants for
+# their upper bounds, we have to let the compiler do the checking.
+
+
+def static_asserts_for_boolean(output, histogram):
+    pass
+
+
+def static_asserts_for_flag(output, histogram):
+    pass
+
+
+def static_asserts_for_count(output, histogram):
+    pass
+
+
+def static_asserts_for_enumerated(output, histogram):
+    n_values = histogram.high()
+    static_assert(
+        output, "%s > 2" % n_values, "Not enough values for %s" % histogram.name()
+    )
+
+
+def shared_static_asserts(output, histogram):
+    name = histogram.name()
+    low = histogram.low()
+    high = histogram.high()
+    n_buckets = histogram.n_buckets()
+    static_assert(output, "%s < %s" % (low, high), "low >= high for %s" % name)
+    static_assert(output, "%s > 2" % n_buckets, "Not enough values for %s" % name)
+    static_assert(output, "%s >= 1" % low, "Incorrect low value for %s" % name)
+    static_assert(
+        output,
+        "%s > %s" % (high, n_buckets),
+        "high must be > number of buckets for %s;"
+        " you may want an enumerated histogram" % name,
+    )
+
+
+def static_asserts_for_linear(output, histogram):
+    shared_static_asserts(output, histogram)
+
+
+def static_asserts_for_exponential(output, histogram):
+    shared_static_asserts(output, histogram)
+
+
+def write_histogram_static_asserts(output, histograms):
+    print(
+        """
+// Perform the checks at the beginning of HistogramGet at
+// compile time, so that incorrect histogram definitions
+// give compile-time errors, not runtime errors.""",
+        file=output,
+    )
+
+    table = {
+        "boolean": static_asserts_for_boolean,
+        "flag": static_asserts_for_flag,
+        "count": static_asserts_for_count,
+        "enumerated": static_asserts_for_enumerated,
+        "categorical": static_asserts_for_enumerated,
+        "linear": static_asserts_for_linear,
+        "exponential": static_asserts_for_exponential,
+    }
+
+    target_os = buildconfig.substs["OS_TARGET"]
+    for histogram in histograms:
+        kind = histogram.kind()
+        if not histogram.record_on_os(target_os):
+            continue
+
+        if kind not in table:
+            raise Exception(
+                'Unknown kind "%s" for histogram "%s".' % (kind, histogram.name())
+            )
+        fn = table[kind]
+        fn(output, histogram)
+
+
+def write_histogram_ranges(output, histograms):
+    # This generates static data to avoid costly initialization of histograms
+    # (especially exponential ones which require log and exp calls) at runtime.
+    # The format must exactly match that required in histogram.cc, which is
+    # 0, buckets..., INT_MAX. Additionally, the list ends in a 0 to aid asserts
+    # that validate that the length of the ranges list is correct.U cache miss.
+    print("#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+    print("const int gHistogramBucketLowerBounds[] = {", file=output)
+    print("#else", file=output)
+    print("constexpr int gHistogramBucketLowerBounds[] = {", file=output)
+    print("#endif", file=output)
+
+    # Print the dummy buckets for expired histograms, and set the offset to match.
+    print("0,1,2,INT_MAX,", file=output)
+    offset = 4
+    ranges_offsets = {}
+
+    for histogram in histograms:
+        ranges = tuple(histogram.ranges())
+        if ranges not in ranges_offsets:
+            ranges_offsets[ranges] = offset
+            # Suffix each ranges listing with INT_MAX, to match histogram.cc's
+            # expected format.
+            offset += len(ranges) + 1
+            print(",".join(map(str, ranges)), ",INT_MAX,", file=output)
+    print("0};", file=output)
+
+    if offset > 32767:
+        raise Exception("Histogram offsets exceeded maximum value for an int16_t.")
+
+    target_os = buildconfig.substs["OS_TARGET"]
+    print("#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+    print("const int16_t gHistogramBucketLowerBoundIndex[] = {", file=output)
+    print("#else", file=output)
+    print("constexpr int16_t gHistogramBucketLowerBoundIndex[] = {", file=output)
+    print("#endif", file=output)
+    for histogram in histograms:
+        if histogram.record_on_os(target_os):
+            our_offset = ranges_offsets[tuple(histogram.ranges())]
+            print("%d," % our_offset, file=output)
+
+    print("};", file=output)
+
+
+def main(output, *filenames):
+    try:
+        histograms = list(parse_histograms.from_files(filenames))
+    except ParserError as ex:
+        print("\nError processing histograms:\n" + str(ex) + "\n")
+        sys.exit(1)
+
+    print(banner, file=output)
+    write_histogram_table(output, histograms)
+    write_histogram_ranges(output, histograms)
+    write_histogram_static_asserts(output, histograms)
+
+
+if __name__ == "__main__":
+    main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_histogram_enum.py b/toolkit/components/telemetry/build_scripts/gen_histogram_enum.py
new file mode 100644
index 0000000000..8d83e760c5
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_histogram_enum.py
@@ -0,0 +1,94 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out a C++ enum definition whose members are the names of
+# histograms as well as the following other members:
+#
+#   - HistogramCount
+#
+# The histograms are defined in files provided as command-line arguments.
+
+import sys
+
+import buildconfig
+from mozparsers import parse_histograms
+from mozparsers.shared_telemetry_utils import ParserError
+
+banner = """/* This file is auto-generated, see gen_histogram_enum.py.  */
+"""
+
+header = """
+#ifndef mozilla_TelemetryHistogramEnums_h
+#define mozilla_TelemetryHistogramEnums_h
+
+#include <cstdint>
+#include <type_traits>
+
+namespace mozilla {
+namespace Telemetry {
+"""
+
+footer = """
+} // namespace mozilla
+} // namespace Telemetry
+#endif // mozilla_TelemetryHistogramEnums_h"""
+
+
+def main(output, *filenames):
+    # Print header.
+    print(banner, file=output)
+    print(header, file=output)
+
+    # Load the histograms.
+    try:
+        all_histograms = list(parse_histograms.from_files(filenames))
+    except ParserError as ex:
+        print("\nError processing histograms:\n" + str(ex) + "\n")
+        sys.exit(1)
+
+    # Print the histogram enums.
+    print("enum HistogramID : uint32_t {", file=output)
+    for histogram in all_histograms:
+        if histogram.record_on_os(buildconfig.substs["OS_TARGET"]):
+            print("  %s," % histogram.name(), file=output)
+
+    print("  HistogramCount,", file=output)
+
+    print("};", file=output)
+
+    # Write categorical label enums.
+    categorical = filter(lambda h: h.kind() == "categorical", all_histograms)
+    categorical = filter(
+        lambda h: h.record_on_os(buildconfig.substs["OS_TARGET"]), categorical
+    )
+    enums = [("LABELS_" + h.name(), h.labels(), h.name()) for h in categorical]
+    for name, labels, _ in enums:
+        print("\nenum class %s : uint32_t {" % name, file=output)
+        print("  %s" % ",\n  ".join(labels), file=output)
+        print("};", file=output)
+
+    print(
+        "\ntemplate<class T> struct IsCategoricalLabelEnum : std::false_type {};",
+        file=output,
+    )
+    for name, _, _ in enums:
+        print(
+            "template<> struct IsCategoricalLabelEnum<%s> : std::true_type {};" % name,
+            file=output,
+        )
+
+    print("\ntemplate<class T> struct CategoricalLabelId {};", file=output)
+    for name, _, id in enums:
+        print(
+            "template<> struct CategoricalLabelId<%s> : "
+            "std::integral_constant<uint32_t, %s> {};" % (name, id),
+            file=output,
+        )
+
+    # Footer.
+    print(footer, file=output)
+
+
+if __name__ == "__main__":
+    main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_histogram_phf.py b/toolkit/components/telemetry/build_scripts/gen_histogram_phf.py
new file mode 100644
index 0000000000..38c7245506
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_histogram_phf.py
@@ -0,0 +1,73 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from mozparsers.shared_telemetry_utils import ParserError
+from perfecthash import PerfectHash
+
+PHFSIZE = 1024
+
+import sys
+
+import buildconfig
+from mozparsers import parse_histograms
+
+banner = """/* This file is auto-generated, see gen_histogram_phf.py.  */
+"""
+
+header = """
+#ifndef mozilla_TelemetryHistogramNameMap_h
+#define mozilla_TelemetryHistogramNameMap_h
+
+#include "mozilla/PerfectHash.h"
+
+namespace mozilla {
+namespace Telemetry {
+"""
+
+footer = """
+} // namespace mozilla
+} // namespace Telemetry
+#endif // mozilla_TelemetryHistogramNameMap_h
+"""
+
+
+def main(output, *filenames):
+    """
+    Generate a Perfect Hash Table for the Histogram name -> Histogram ID lookup.
+    The table is immutable once generated and we can avoid any dynamic memory allocation.
+    """
+
+    output.write(banner)
+    output.write(header)
+
+    try:
+        histograms = list(parse_histograms.from_files(filenames))
+        histograms = [
+            h for h in histograms if h.record_on_os(buildconfig.substs["OS_TARGET"])
+        ]
+    except ParserError as ex:
+        print("\nError processing histograms:\n" + str(ex) + "\n")
+        sys.exit(1)
+
+    histograms = [
+        (bytearray(hist.name(), "ascii"), idx) for (idx, hist) in enumerate(histograms)
+    ]
+    name_phf = PerfectHash(histograms, PHFSIZE)
+
+    output.write(
+        name_phf.cxx_codegen(
+            name="HistogramIDByNameLookup",
+            entry_type="uint32_t",
+            lower_entry=lambda x: str(x[1]),
+            key_type="const nsACString&",
+            key_bytes="aKey.BeginReading()",
+            key_length="aKey.Length()",
+        )
+    )
+
+    output.write(footer)
+
+
+if __name__ == "__main__":
+    main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_process_data.py b/toolkit/components/telemetry/build_scripts/gen_process_data.py
new file mode 100644
index 0000000000..2a494689ad
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_process_data.py
@@ -0,0 +1,80 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out processes data for C++. The processes are defined
+# in a file provided as a command-line argument.
+
+import collections
+import sys
+
+from mozparsers.shared_telemetry_utils import ParserError, load_yaml_file
+
+# The banner/text at the top of the generated file.
+banner = """/* This file is auto-generated from Telemetry build scripts,
+   see gen_process_data.py. */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryProcessData_h
+#define mozilla_TelemetryProcessData_h
+
+#include "mozilla/TelemetryProcessEnums.h"
+
+namespace mozilla {
+namespace Telemetry {
+"""
+
+file_footer = """
+} // namespace Telemetry
+} // namespace mozilla
+#endif // mozilla_TelemetryProcessData_h"""
+
+
+def to_enum_label(name):
+    return name.title().replace("_", "")
+
+
+def write_processes_data(processes, output):
+    def p(line):
+        print(line, file=output)
+
+    processes = collections.OrderedDict(processes)
+
+    p("static GeckoProcessType ProcessIDToGeckoProcessType[%d] = {" % len(processes))
+    for i, (name, value) in enumerate(sorted(processes.items())):
+        p(
+            "  /* %d: ProcessID::%s = */ %s,"
+            % (i, to_enum_label(name), value["gecko_enum"])
+        )
+    p("};")
+    p("")
+    p("#if defined(_MSC_VER) && !defined(__clang__)")
+    p("static const char* const ProcessIDToString[%d] = {" % len(processes))
+    p("#else")
+    p("static constexpr const char* ProcessIDToString[%d] = {" % len(processes))
+    p("#endif")
+    for i, (name, value) in enumerate(sorted(processes.items())):
+        p('  /* %d: ProcessID::%s = */ "%s",' % (i, to_enum_label(name), name))
+    p("};")
+
+
+def main(output, *filenames):
+    if len(filenames) > 1:
+        raise Exception("We don't support loading from more than one file.")
+
+    try:
+        processes = load_yaml_file(filenames[0])
+
+        # Write the process data file.
+        print(banner, file=output)
+        print(file_header, file=output)
+        write_processes_data(processes, output)
+        print(file_footer, file=output)
+    except ParserError as ex:
+        print("\nError generating processes data:\n" + str(ex) + "\n")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_process_enum.py b/toolkit/components/telemetry/build_scripts/gen_process_enum.py
new file mode 100644
index 0000000000..bfe2d65e43
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_process_enum.py
@@ -0,0 +1,69 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out processes data for C++. The processes are defined
+# in a file provided as a command-line argument.
+
+import collections
+import sys
+
+from mozparsers.shared_telemetry_utils import ParserError, load_yaml_file
+
+# The banner/text at the top of the generated file.
+banner = """/* This file is auto-generated from Telemetry build scripts,
+   see gen_process_enum.py. */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryProcessEnums_h
+#define mozilla_TelemetryProcessEnums_h
+
+#include <cstdint>
+
+namespace mozilla {
+namespace Telemetry {
+"""
+
+file_footer = """
+} // namespace Telemetry
+} // namespace mozilla
+#endif // mozilla_TelemetryProcessEnums_h"""
+
+
+def to_enum_label(name):
+    return name.title().replace("_", "")
+
+
+def write_processes_enum(processes, output):
+    def p(line):
+        print(line, file=output)
+
+    processes = collections.OrderedDict(processes)
+
+    p("enum class ProcessID : uint32_t {")
+    for i, (name, _) in enumerate(sorted(processes.items())):
+        p("  %s = %d," % (to_enum_label(name), i))
+    p("  Count = %d" % len(processes))
+    p("};")
+
+
+def main(output, *filenames):
+    if len(filenames) > 1:
+        raise Exception("We don't support loading from more than one file.")
+
+    try:
+        processes = load_yaml_file(filenames[0])
+
+        # Write the process data file.
+        print(banner, file=output)
+        print(file_header, file=output)
+        write_processes_enum(processes, output)
+        print(file_footer, file=output)
+    except ParserError as ex:
+        print("\nError generating processes enums:\n" + str(ex) + "\n")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_scalar_data.py b/toolkit/components/telemetry/build_scripts/gen_scalar_data.py
new file mode 100644
index 0000000000..6ef1f457b5
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_scalar_data.py
@@ -0,0 +1,216 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out scalar information for C++.  The scalars are defined
+# in a file provided as a command-line argument.
+
+import json
+import sys
+from collections import OrderedDict
+from os import path
+
+import buildconfig
+from mozparsers import parse_scalars
+from mozparsers.shared_telemetry_utils import ParserError, static_assert
+
+COMPONENTS_PATH = path.abspath(
+    path.join(path.dirname(__file__), path.pardir, path.pardir)
+)
+sys.path.append(
+    path.join(COMPONENTS_PATH, "glean", "build_scripts", "glean_parser_ext")
+)
+from string_table import StringTable
+
+# The banner/text at the top of the generated file.
+banner = """/* This file is auto-generated, only for internal use in TelemetryScalar.h,
+   see gen_scalar_data.py. */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryScalarData_h
+#define mozilla_TelemetryScalarData_h
+#include "core/ScalarInfo.h"
+#include "nsITelemetry.h"
+namespace {
+"""
+
+file_footer = """\
+} // namespace
+#endif // mozilla_TelemetryScalarData_h
+"""
+
+
+def write_scalar_info(
+    scalar,
+    output,
+    name_index,
+    expiration_index,
+    store_index,
+    store_count,
+    key_count,
+    key_index,
+):
+    """Writes a scalar entry to the output file.
+
+    :param scalar: a ScalarType instance describing the scalar.
+    :param output: the output stream.
+    :param name_index: the index of the scalar name in the strings table.
+    :param expiration_index: the index of the expiration version in the strings table.
+    """
+    if scalar.record_on_os(buildconfig.substs["OS_TARGET"]):
+        print(
+            "  {{ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }},".format(
+                scalar.nsITelemetry_kind,
+                name_index,
+                expiration_index,
+                scalar.dataset,
+                " | ".join(scalar.record_in_processes_enum),
+                "true" if scalar.keyed else "false",
+                key_count,
+                key_index,
+                " | ".join(scalar.products_enum),
+                store_count,
+                store_index,
+            ),
+            file=output,
+        )
+
+
+def write_scalar_tables(scalars, output):
+    """Writes the scalar and strings tables to an header file.
+
+    :param scalars: a list of ScalarType instances describing the scalars.
+    :param output: the output stream.
+    """
+    string_table = StringTable()
+
+    store_table = []
+    total_store_count = 0
+
+    keys_table = []
+    total_key_count = 0
+
+    print("const ScalarInfo gScalars[] = {", file=output)
+    for s in scalars:
+        # We add both the scalar label and the expiration string to the strings
+        # table.
+        name_index = string_table.stringIndex(s.label)
+        exp_index = string_table.stringIndex(s.expires)
+
+        stores = s.record_into_store
+        store_index = 0
+        if stores == ["main"]:
+            # if count == 1 && offset == UINT16_MAX -> only main store
+            store_index = "UINT16_MAX"
+        else:
+            store_index = total_store_count
+            store_table.append((s.label, string_table.stringIndexes(stores)))
+            total_store_count += len(stores)
+
+        keys = s.keys
+        key_index = 0
+        if len(keys) > 0:
+            key_index = total_key_count
+            keys_table.append((s.label, string_table.stringIndexes(keys)))
+            total_key_count += len(keys)
+
+        # Write the scalar info entry.
+        write_scalar_info(
+            s,
+            output,
+            name_index,
+            exp_index,
+            store_index,
+            len(stores),
+            len(keys),
+            key_index,
+        )
+    print("};", file=output)
+
+    string_table_name = "gScalarsStringTable"
+    string_table.writeDefinition(output, string_table_name)
+    static_assert(
+        output, "sizeof(%s) <= UINT32_MAX" % string_table_name, "index overflow"
+    )
+
+    print("\nconstexpr uint32_t gScalarKeysTable[] = {", file=output)
+    for name, indexes in keys_table:
+        print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output)
+    print("};", file=output)
+
+    store_table_name = "gScalarStoresTable"
+    print("\n#if defined(_MSC_VER) && !defined(__clang__)", file=output)
+    print("const uint32_t {}[] = {{".format(store_table_name), file=output)
+    print("#else", file=output)
+    print("constexpr uint32_t {}[] = {{".format(store_table_name), file=output)
+    print("#endif", file=output)
+    for name, indexes in store_table:
+        print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output)
+    print("};", file=output)
+    static_assert(
+        output, "sizeof(%s) <= UINT16_MAX" % store_table_name, "index overflow"
+    )
+
+
+def parse_scalar_definitions(filenames):
+    scalars = []
+    for filename in filenames:
+        try:
+            batch = parse_scalars.load_scalars(filename)
+            scalars.extend(batch)
+        except ParserError as ex:
+            print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr)
+            sys.exit(1)
+    return scalars
+
+
+def generate_JSON_definitions(output, *filenames):
+    """Write the scalar definitions to a JSON file.
+
+    :param output: the file to write the content to.
+    :param filenames: a list of filenames provided by the build system.
+           We only support a single file.
+    """
+    scalars = parse_scalar_definitions(filenames)
+
+    scalar_definitions = OrderedDict()
+    for scalar in scalars:
+        category = scalar.category
+
+        if category not in scalar_definitions:
+            scalar_definitions[category] = OrderedDict()
+
+        scalar_definitions[category][scalar.name] = OrderedDict(
+            {
+                "kind": scalar.nsITelemetry_kind,
+                "keyed": scalar.keyed,
+                "keys": scalar.keys,
+                "record_on_release": True
+                if scalar.dataset_short == "opt-out"
+                else False,
+                # We don't expire dynamic-builtin scalars: they're only meant for
+                # use in local developer builds anyway. They will expire when rebuilding.
+                "expired": False,
+                "stores": scalar.record_into_store,
+                "expires": scalar.expires,
+                "products": scalar.products,
+            }
+        )
+
+    json.dump(scalar_definitions, output)
+
+
+def main(output, *filenames):
+    # Load the scalars first.
+    scalars = parse_scalar_definitions(filenames)
+
+    # Write the scalar data file.
+    print(banner, file=output)
+    print(file_header, file=output)
+    write_scalar_tables(scalars, output)
+    print(file_footer, file=output)
+
+
+if __name__ == "__main__":
+    main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_scalar_enum.py b/toolkit/components/telemetry/build_scripts/gen_scalar_enum.py
new file mode 100644
index 0000000000..321cd047d7
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_scalar_enum.py
@@ -0,0 +1,60 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out a C++ enum definition whose members are the names of
+# scalar types.
+#
+# The scalars are defined in files provided as command-line arguments.
+
+import sys
+
+import buildconfig
+from mozparsers import parse_scalars
+from mozparsers.shared_telemetry_utils import ParserError
+
+banner = """/* This file is auto-generated, see gen_scalar_enum.py.  */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryScalarEnums_h
+#define mozilla_TelemetryScalarEnums_h
+namespace mozilla {
+namespace Telemetry {
+enum class ScalarID : uint32_t {\
+"""
+
+file_footer = """\
+};
+} // namespace mozilla
+} // namespace Telemetry
+#endif // mozilla_TelemetryScalarEnums_h
+"""
+
+
+def main(output, *filenames):
+    # Load the scalars first.
+    scalars = []
+    for filename in filenames:
+        try:
+            batch = parse_scalars.load_scalars(filename)
+            scalars.extend(batch)
+        except ParserError as ex:
+            print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr)
+            sys.exit(1)
+
+    # Write the enum file.
+    print(banner, file=output)
+    print(file_header, file=output)
+
+    for s in scalars:
+        if s.record_on_os(buildconfig.substs["OS_TARGET"]):
+            print("  %s," % s.enum_label, file=output)
+
+    print("  ScalarCount,", file=output)
+
+    print(file_footer, file=output)
+
+
+if __name__ == "__main__":
+    main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_userinteraction_data.py b/toolkit/components/telemetry/build_scripts/gen_userinteraction_data.py
new file mode 100644
index 0000000000..b12cbde239
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_userinteraction_data.py
@@ -0,0 +1,105 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Write out UserInteraction information for C++. The UserInteractions are
+# defined in a file provided as a command-line argument.
+
+import sys
+from os import path
+
+from mozparsers import parse_user_interactions
+from mozparsers.shared_telemetry_utils import ParserError, static_assert
+
+COMPONENTS_PATH = path.abspath(
+    path.join(path.dirname(__file__), path.pardir, path.pardir)
+)
+sys.path.append(
+    path.join(COMPONENTS_PATH, "glean", "build_scripts", "glean_parser_ext")
+)
+import sys
+
+from string_table import StringTable
+
+# The banner/text at the top of the generated file.
+banner = """/* This file is auto-generated, only for internal use in
+   TelemetryUserInteraction.h, see gen_userinteraction_data.py. */
+"""
+
+file_header = """\
+#ifndef mozilla_TelemetryUserInteractionData_h
+#define mozilla_TelemetryUserInteractionData_h
+#include "core/UserInteractionInfo.h"
+"""
+
+file_footer = """\
+#endif // mozilla_TelemetryUserInteractionData_h
+"""
+
+
+def write_user_interaction_table(user_interactions, output, string_table):
+    head = """
+      namespace mozilla {
+      namespace Telemetry {
+      namespace UserInteractionID {
+        const static uint32_t UserInteractionCount = %d;
+      }  // namespace UserInteractionID
+      }  // namespace Telemetry
+      }  // namespace mozilla
+    """
+
+    print(head % len(user_interactions), file=output)
+
+    print("namespace {", file=output)
+
+    table_name = "gUserInteractions"
+    print("constexpr UserInteractionInfo %s[] = {" % table_name, file=output)
+
+    for u in user_interactions:
+        name_index = string_table.stringIndex(u.label)
+        print("  UserInteractionInfo({}),".format(name_index), file=output)
+    print("};", file=output)
+
+    static_assert(
+        output,
+        "sizeof(%s) <= UINT32_MAX" % table_name,
+        "index overflow of UserInteractionInfo table %s" % table_name,
+    )
+
+    print("}  // namespace", file=output)
+
+
+def main(output, *filenames):
+    # Load the UserInteraction data.
+    user_interactions = []
+    for filename in filenames:
+        try:
+            batch = parse_user_interactions.load_user_interactions(filename)
+            user_interactions.extend(batch)
+        except ParserError as ex:
+            print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr)
+            sys.exit(1)
+
+    # Write the scalar data file.
+    print(banner, file=output)
+    print(file_header, file=output)
+
+    string_table = StringTable()
+
+    # Write the data for individual UserInteractions.
+    write_user_interaction_table(user_interactions, output, string_table)
+    print("", file=output)
+
+    # Write the string table.
+    string_table_name = "gUserInteractionsStringTable"
+    string_table.writeDefinition(output, string_table_name)
+    static_assert(
+        output, "sizeof(%s) <= UINT32_MAX" % string_table_name, "index overflow"
+    )
+    print("", file=output)
+
+    print(file_footer, file=output)
+
+
+if __name__ == "__main__":
+    main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/gen_userinteraction_phf.py b/toolkit/components/telemetry/build_scripts/gen_userinteraction_phf.py
new file mode 100644
index 0000000000..f1c7256414
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/gen_userinteraction_phf.py
@@ -0,0 +1,70 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from mozparsers.shared_telemetry_utils import ParserError
+from perfecthash import PerfectHash
+
+PHFSIZE = 1024
+
+import sys
+
+from mozparsers import parse_user_interactions
+
+banner = """/* This file is auto-generated, see gen_userinteraction_phf.py.  */
+"""
+
+header = """
+#ifndef mozilla_TelemetryUserInteractionNameMap_h
+#define mozilla_TelemetryUserInteractionNameMap_h
+
+#include "mozilla/PerfectHash.h"
+
+namespace mozilla {
+namespace Telemetry {
+"""
+
+footer = """
+} // namespace mozilla
+} // namespace Telemetry
+#endif // mozilla_TelemetryUserInteractionNameMap_h
+"""
+
+
+def main(output, *filenames):
+    """
+    Generate a Perfect Hash Table for the UserInteraction name -> UserInteraction ID lookup.
+    The table is immutable once generated and we can avoid any dynamic memory allocation.
+    """
+
+    output.write(banner)
+    output.write(header)
+
+    try:
+        user_interactions = list(parse_user_interactions.from_files(filenames))
+    except ParserError as ex:
+        print("\nError processing UserInteractions:\n" + str(ex) + "\n")
+        sys.exit(1)
+
+    user_interactions = [
+        (bytearray(ui.label, "ascii"), idx)
+        for (idx, ui) in enumerate(user_interactions)
+    ]
+    name_phf = PerfectHash(user_interactions, PHFSIZE)
+
+    output.write(
+        name_phf.cxx_codegen(
+            name="UserInteractionIDByNameLookup",
+            entry_type="uint32_t",
+            lower_entry=lambda x: str(x[1]),
+            key_type="const nsACString&",
+            key_bytes="aKey.BeginReading()",
+            key_length="aKey.Length()",
+        )
+    )
+
+    output.write(footer)
+
+
+if __name__ == "__main__":
+    main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/__init__.py b/toolkit/components/telemetry/build_scripts/mozparsers/__init__.py
new file mode 100644
index 0000000000..c580d191c1
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/__init__.py
@@ -0,0 +1,3 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_events.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_events.py
new file mode 100644
index 0000000000..09ed651917
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_events.py
@@ -0,0 +1,477 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import atexit
+import itertools
+import re
+import string
+
+import yaml
+
+from . import shared_telemetry_utils as utils
+from .shared_telemetry_utils import ParserError
+
+atexit.register(ParserError.exit_func)
+
+MAX_CATEGORY_NAME_LENGTH = 30
+MAX_METHOD_NAME_LENGTH = 20
+MAX_OBJECT_NAME_LENGTH = 20
+MAX_EXTRA_KEYS_COUNT = 10
+MAX_EXTRA_KEY_NAME_LENGTH = 15
+
+IDENTIFIER_PATTERN = r"^[a-zA-Z][a-zA-Z0-9_.]*[a-zA-Z0-9]$"
+
+
+def nice_type_name(t):
+    if issubclass(t, str):
+        return "string"
+    return t.__name__
+
+
+def convert_to_cpp_identifier(s, sep):
+    return string.capwords(s, sep).replace(sep, "")
+
+
+class OneOf:
+    """This is a placeholder type for the TypeChecker below.
+    It signals that the checked value should match one of the following arguments
+    passed to the TypeChecker constructor.
+    """
+
+    pass
+
+
+class AtomicTypeChecker:
+    """Validate a simple value against a given type"""
+
+    def __init__(self, instance_type):
+        self.instance_type = instance_type
+
+    def check(self, identifier, key, value):
+        if not isinstance(value, self.instance_type):
+            ParserError(
+                "%s: Failed type check for %s - expected %s, got %s."
+                % (
+                    identifier,
+                    key,
+                    nice_type_name(self.instance_type),
+                    nice_type_name(type(value)),
+                )
+            ).handle_later()
+
+
+class MultiTypeChecker:
+    """Validate a simple value against a list of possible types"""
+
+    def __init__(self, *instance_types):
+        if not instance_types:
+            raise Exception("At least one instance type is required.")
+        self.instance_types = instance_types
+
+    def check(self, identifier, key, value):
+        if not any(isinstance(value, i) for i in self.instance_types):
+            ParserError(
+                "%s: Failed type check for %s - got %s, expected one of:\n%s"
+                % (
+                    identifier,
+                    key,
+                    nice_type_name(type(value)),
+                    " or ".join(map(nice_type_name, self.instance_types)),
+                )
+            ).handle_later()
+
+
+class ListTypeChecker:
+    """Validate a list of values against a given type"""
+
+    def __init__(self, instance_type):
+        self.instance_type = instance_type
+
+    def check(self, identifier, key, value):
+        if len(value) < 1:
+            ParserError(
+                "%s: Failed check for %s - list should not be empty."
+                % (identifier, key)
+            ).handle_now()
+
+        for x in value:
+            if not isinstance(x, self.instance_type):
+                ParserError(
+                    "%s: Failed type check for %s - expected list value type %s, got"
+                    " %s."
+                    % (
+                        identifier,
+                        key,
+                        nice_type_name(self.instance_type),
+                        nice_type_name(type(x)),
+                    )
+                ).handle_later()
+
+
+class DictTypeChecker:
+    """Validate keys and values of a dict against a given type"""
+
+    def __init__(self, keys_instance_type, values_instance_type):
+        self.keys_instance_type = keys_instance_type
+        self.values_instance_type = values_instance_type
+
+    def check(self, identifier, key, value):
+        if len(value.keys()) < 1:
+            ParserError(
+                "%s: Failed check for %s - dict should not be empty."
+                % (identifier, key)
+            ).handle_now()
+        for x in value.keys():
+            if not isinstance(x, self.keys_instance_type):
+                ParserError(
+                    "%s: Failed dict type check for %s - expected key type %s, got "
+                    "%s."
+                    % (
+                        identifier,
+                        key,
+                        nice_type_name(self.keys_instance_type),
+                        nice_type_name(type(x)),
+                    )
+                ).handle_later()
+        for k, v in value.items():
+            if not isinstance(v, self.values_instance_type):
+                ParserError(
+                    "%s: Failed dict type check for %s - "
+                    "expected value type %s for key %s, got %s."
+                    % (
+                        identifier,
+                        key,
+                        nice_type_name(self.values_instance_type),
+                        k,
+                        nice_type_name(type(v)),
+                    )
+                ).handle_later()
+
+
+def type_check_event_fields(identifier, name, definition):
+    """Perform a type/schema check on the event definition."""
+    REQUIRED_FIELDS = {
+        "objects": ListTypeChecker(str),
+        "bug_numbers": ListTypeChecker(int),
+        "notification_emails": ListTypeChecker(str),
+        "record_in_processes": ListTypeChecker(str),
+        "description": AtomicTypeChecker(str),
+        "products": ListTypeChecker(str),
+    }
+    OPTIONAL_FIELDS = {
+        "methods": ListTypeChecker(str),
+        "release_channel_collection": AtomicTypeChecker(str),
+        "expiry_version": AtomicTypeChecker(str),
+        "extra_keys": DictTypeChecker(str, str),
+        "operating_systems": ListTypeChecker(str),
+    }
+    ALL_FIELDS = REQUIRED_FIELDS.copy()
+    ALL_FIELDS.update(OPTIONAL_FIELDS)
+
+    # Check that all the required fields are available.
+    missing_fields = [f for f in REQUIRED_FIELDS.keys() if f not in definition]
+    if len(missing_fields) > 0:
+        ParserError(
+            identifier + ": Missing required fields: " + ", ".join(missing_fields)
+        ).handle_now()
+
+    # Is there any unknown field?
+    unknown_fields = [f for f in definition.keys() if f not in ALL_FIELDS]
+    if len(unknown_fields) > 0:
+        ParserError(
+            identifier + ": Unknown fields: " + ", ".join(unknown_fields)
+        ).handle_later()
+
+    # Type-check fields.
+    for k, v in definition.items():
+        ALL_FIELDS[k].check(identifier, k, v)
+
+
+def string_check(identifier, field, value, min_length=1, max_length=None, regex=None):
+    # Length check.
+    if len(value) < min_length:
+        ParserError(
+            "%s: Value '%s' for field %s is less than minimum length of %d."
+            % (identifier, value, field, min_length)
+        ).handle_later()
+    if max_length and len(value) > max_length:
+        ParserError(
+            "%s: Value '%s' for field %s is greater than maximum length of %d."
+            % (identifier, value, field, max_length)
+        ).handle_later()
+    # Regex check.
+    if regex and not re.match(regex, value):
+        ParserError(
+            '%s: String value "%s" for %s is not matching pattern "%s".'
+            % (identifier, value, field, regex)
+        ).handle_later()
+
+
+class EventData:
+    """A class representing one event."""
+
+    def __init__(self, category, name, definition, strict_type_checks=False):
+        self._category = category
+        self._name = name
+        self._definition = definition
+        self._strict_type_checks = strict_type_checks
+
+        type_check_event_fields(self.identifier, name, definition)
+
+        # Check method & object string patterns.
+        if strict_type_checks:
+            for method in self.methods:
+                string_check(
+                    self.identifier,
+                    field="methods",
+                    value=method,
+                    min_length=1,
+                    max_length=MAX_METHOD_NAME_LENGTH,
+                    regex=IDENTIFIER_PATTERN,
+                )
+            for obj in self.objects:
+                string_check(
+                    self.identifier,
+                    field="objects",
+                    value=obj,
+                    min_length=1,
+                    max_length=MAX_OBJECT_NAME_LENGTH,
+                    regex=IDENTIFIER_PATTERN,
+                )
+
+        # Check release_channel_collection
+        rcc_key = "release_channel_collection"
+        rcc = definition.get(rcc_key, "opt-in")
+        allowed_rcc = ["opt-in", "opt-out"]
+        if rcc not in allowed_rcc:
+            ParserError(
+                "%s: Value for %s should be one of: %s"
+                % (self.identifier, rcc_key, ", ".join(allowed_rcc))
+            ).handle_later()
+
+        # Check record_in_processes.
+        record_in_processes = definition.get("record_in_processes")
+        for proc in record_in_processes:
+            if not utils.is_valid_process_name(proc):
+                ParserError(
+                    self.identifier + ": Unknown value in record_in_processes: " + proc
+                ).handle_later()
+
+        # Check products.
+        products = definition.get("products")
+        for product in products:
+            if not utils.is_valid_product(product) and self._strict_type_checks:
+                ParserError(
+                    self.identifier + ": Unknown value in products: " + product
+                ).handle_later()
+            if utils.is_geckoview_streaming_product(product):
+                ParserError(
+                    "{}: Product `{}` unsupported for Event Telemetry".format(
+                        self.identifier, product
+                    )
+                ).handle_later()
+
+        # Check operating_systems.
+        operating_systems = definition.get("operating_systems", [])
+        for operating_system in operating_systems:
+            if not utils.is_valid_os(operating_system):
+                ParserError(
+                    self.identifier
+                    + ": Unknown value in operating_systems: "
+                    + operating_system
+                ).handle_later()
+
+        # Check extra_keys.
+        extra_keys = definition.get("extra_keys", {})
+        if len(extra_keys.keys()) > MAX_EXTRA_KEYS_COUNT:
+            ParserError(
+                "%s: Number of extra_keys exceeds limit %d."
+                % (self.identifier, MAX_EXTRA_KEYS_COUNT)
+            ).handle_later()
+        for key in extra_keys.keys():
+            string_check(
+                self.identifier,
+                field="extra_keys",
+                value=key,
+                min_length=1,
+                max_length=MAX_EXTRA_KEY_NAME_LENGTH,
+                regex=IDENTIFIER_PATTERN,
+            )
+
+        # Check expiry.
+        if "expiry_version" not in definition:
+            ParserError(
+                "%s: event is missing required field expiry_version" % (self.identifier)
+            ).handle_later()
+
+        # Finish setup.
+        # Historical versions of Events.yaml may contain expiration versions
+        # using the deprecated format 'N.Na1'. Those scripts set
+        # self._strict_type_checks to false.
+        expiry_version = definition.get("expiry_version", "never")
+        if (
+            not utils.validate_expiration_version(expiry_version)
+            and self._strict_type_checks
+        ):
+            ParserError(
+                "{}: invalid expiry_version: {}.".format(
+                    self.identifier, expiry_version
+                )
+            ).handle_now()
+        definition["expiry_version"] = utils.add_expiration_postfix(expiry_version)
+
+    @property
+    def category(self):
+        return self._category
+
+    @property
+    def category_cpp(self):
+        # Transform e.g. category.example into CategoryExample.
+        return convert_to_cpp_identifier(self._category, ".")
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def identifier(self):
+        return self.category + "#" + self.name
+
+    @property
+    def methods(self):
+        return self._definition.get("methods", [self.name])
+
+    @property
+    def objects(self):
+        return self._definition.get("objects")
+
+    @property
+    def record_in_processes(self):
+        return self._definition.get("record_in_processes")
+
+    @property
+    def record_in_processes_enum(self):
+        """Get the non-empty list of flags representing the processes to record data in"""
+        return [utils.process_name_to_enum(p) for p in self.record_in_processes]
+
+    @property
+    def products(self):
+        """Get the non-empty list of products to record data on"""
+        return self._definition.get("products")
+
+    @property
+    def products_enum(self):
+        """Get the non-empty list of flags representing products to record data on"""
+        return [utils.product_name_to_enum(p) for p in self.products]
+
+    @property
+    def expiry_version(self):
+        return self._definition.get("expiry_version")
+
+    @property
+    def operating_systems(self):
+        """Get the list of operating systems to record data on"""
+        return self._definition.get("operating_systems", ["all"])
+
+    def record_on_os(self, target_os):
+        """Check if this probe should be recorded on the passed os."""
+        os = self.operating_systems
+        if "all" in os:
+            return True
+
+        canonical_os = utils.canonical_os(target_os)
+
+        if "unix" in os and canonical_os in utils.UNIX_LIKE_OS:
+            return True
+
+        return canonical_os in os
+
+    @property
+    def enum_labels(self):
+        def enum(method_name, object_name):
+            m = convert_to_cpp_identifier(method_name, "_")
+            o = convert_to_cpp_identifier(object_name, "_")
+            return m + "_" + o
+
+        combinations = itertools.product(self.methods, self.objects)
+        return [enum(t[0], t[1]) for t in combinations]
+
+    @property
+    def dataset(self):
+        """Get the nsITelemetry constant equivalent for release_channel_collection."""
+        rcc = self.dataset_short
+        if rcc == "opt-out":
+            return "nsITelemetry::DATASET_ALL_CHANNELS"
+        return "nsITelemetry::DATASET_PRERELEASE_CHANNELS"
+
+    @property
+    def dataset_short(self):
+        """Get the short name of the chosen release channel collection policy for the event."""
+        # The collection policy is optional, but we still define a default
+        # behaviour for it.
+        return self._definition.get("release_channel_collection", "opt-in")
+
+    @property
+    def extra_keys(self):
+        return list(sorted(self._definition.get("extra_keys", {}).keys()))
+
+
+def load_events(filename, strict_type_checks):
+    """Parses a YAML file containing the event definitions.
+
+    :param filename: the YAML file containing the event definitions.
+    :strict_type_checks A boolean indicating whether to use the stricter type checks.
+    :raises ParserError: if the event file cannot be opened or parsed.
+    """
+
+    # Parse the event definitions from the YAML file.
+    events = None
+    try:
+        with open(filename, "r") as f:
+            events = yaml.safe_load(f)
+    except IOError as e:
+        ParserError("Error opening " + filename + ": " + str(e) + ".").handle_now()
+    except ParserError as e:
+        ParserError(
+            "Error parsing events in " + filename + ": " + str(e) + "."
+        ).handle_now()
+
+    event_list = []
+
+    # Events are defined in a fixed two-level hierarchy within the definition file.
+    # The first level contains the category (group name), while the second level contains
+    # the event names and definitions, e.g.:
+    #   category.name:
+    #     event_name:
+    #       <event definition>
+    #      ...
+    #   ...
+    for category_name, category in sorted(events.items()):
+        string_check(
+            "top level structure",
+            field="category",
+            value=category_name,
+            min_length=1,
+            max_length=MAX_CATEGORY_NAME_LENGTH,
+            regex=IDENTIFIER_PATTERN,
+        )
+
+        # Make sure that the category has at least one entry in it.
+        if not category or len(category) == 0:
+            ParserError(
+                "Category " + category_name + " must contain at least one entry."
+            ).handle_now()
+
+        for name, entry in sorted(category.items()):
+            string_check(
+                category_name,
+                field="event name",
+                value=name,
+                min_length=1,
+                max_length=MAX_METHOD_NAME_LENGTH,
+                regex=IDENTIFIER_PATTERN,
+            )
+            event_list.append(EventData(category_name, name, entry, strict_type_checks))
+
+    return event_list
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py
new file mode 100644
index 0000000000..626188bf06
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py
@@ -0,0 +1,836 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import atexit
+import collections
+import itertools
+import json
+import math
+import os
+import re
+from collections import OrderedDict
+from ctypes import c_int
+
+from . import shared_telemetry_utils as utils
+from .shared_telemetry_utils import ParserError
+
+atexit.register(ParserError.exit_func)
+
+# Constants.
+MAX_LABEL_LENGTH = 20
+MAX_LABEL_COUNT = 100
+MAX_KEY_COUNT = 30
+MAX_KEY_LENGTH = 20
+MIN_CATEGORICAL_BUCKET_COUNT = 50
+CPP_IDENTIFIER_PATTERN = "^[a-z][a-z0-9_]+[a-z0-9]$"
+
+ALWAYS_ALLOWED_KEYS = [
+    "kind",
+    "description",
+    "operating_systems",
+    "expires_in_version",
+    "alert_emails",
+    "keyed",
+    "releaseChannelCollection",
+    "bug_numbers",
+    "keys",
+    "record_in_processes",
+    "record_into_store",
+    "products",
+]
+
+BASE_DOC_URL = (
+    "https://firefox-source-docs.mozilla.org/toolkit/components/" "telemetry/telemetry/"
+)
+HISTOGRAMS_DOC_URL = BASE_DOC_URL + "collection/histograms.html"
+SCALARS_DOC_URL = BASE_DOC_URL + "collection/scalars.html"
+
+GECKOVIEW_STREAMING_SUPPORTED_KINDS = [
+    "linear",
+    "exponential",
+    "categorical",
+]
+
+
+def linear_buckets(dmin, dmax, n_buckets):
+    ret_array = [0] * n_buckets
+    dmin = float(dmin)
+    dmax = float(dmax)
+    for i in range(1, n_buckets):
+        linear_range = (dmin * (n_buckets - 1 - i) + dmax * (i - 1)) / (n_buckets - 2)
+        ret_array[i] = int(linear_range + 0.5)
+    return ret_array
+
+
+def exponential_buckets(dmin, dmax, n_buckets):
+    log_max = math.log(dmax)
+    bucket_index = 2
+    ret_array = [0] * n_buckets
+    current = dmin
+    ret_array[1] = current
+    for bucket_index in range(2, n_buckets):
+        log_current = math.log(current)
+        log_ratio = (log_max - log_current) / (n_buckets - bucket_index)
+        log_next = log_current + log_ratio
+        next_value = int(math.floor(math.exp(log_next) + 0.5))
+        if next_value > current:
+            current = next_value
+        else:
+            current = current + 1
+        ret_array[bucket_index] = current
+    return ret_array
+
+
+allowlists = None
+
+
+def load_allowlist():
+    global allowlists
+    try:
+        parsers_path = os.path.realpath(os.path.dirname(__file__))
+        # The parsers live in build_scripts/parsers in the Telemetry module, while
+        # the histogram-allowlists file lives in the root of the module. Account
+        # for that when looking for the allowlist.
+        # NOTE: if the parsers are moved, this logic will need to be updated.
+        telemetry_module_path = os.path.abspath(
+            os.path.join(parsers_path, os.pardir, os.pardir)
+        )
+        allowlist_path = os.path.join(
+            telemetry_module_path, "histogram-allowlists.json"
+        )
+        with open(allowlist_path, "r") as f:
+            try:
+                allowlists = json.load(f)
+                for name, allowlist in allowlists.items():
+                    allowlists[name] = set(allowlist)
+            except ValueError:
+                ParserError("Error parsing allowlist: %s" % allowlist_path).handle_now()
+    except IOError:
+        allowlists = None
+        ParserError("Unable to parse allowlist: %s." % allowlist_path).handle_now()
+
+
+class Histogram:
+    """A class for representing a histogram definition."""
+
+    def __init__(self, name, definition, strict_type_checks=False):
+        """Initialize a histogram named name with the given definition.
+        definition is a dict-like object that must contain at least the keys:
+
+         - 'kind': The kind of histogram.  Must be one of 'boolean', 'flag',
+           'count', 'enumerated', 'linear', or 'exponential'.
+         - 'description': A textual description of the histogram.
+         - 'strict_type_checks': A boolean indicating whether to use the new, stricter type checks.
+                                 The server-side still has to deal with old, oddly typed
+                                 submissions, so we have to skip them there by default.
+        """
+        self._strict_type_checks = strict_type_checks
+        self.verify_attributes(name, definition)
+        self._name = name
+        self._description = definition["description"]
+        self._kind = definition["kind"]
+        self._keys = definition.get("keys", [])
+        self._keyed = definition.get("keyed", False)
+        self._expiration = definition.get("expires_in_version")
+        self._labels = definition.get("labels", [])
+        self._record_in_processes = definition.get("record_in_processes")
+        self._record_into_store = definition.get("record_into_store", ["main"])
+        self._products = definition.get("products")
+        self._operating_systems = definition.get("operating_systems", ["all"])
+
+        self.compute_bucket_parameters(definition)
+        self.set_nsITelemetry_kind()
+        self.set_dataset(definition)
+
+    def name(self):
+        """Return the name of the histogram."""
+        return self._name
+
+    def description(self):
+        """Return the description of the histogram."""
+        return self._description
+
+    def kind(self):
+        """Return the kind of the histogram.
+        Will be one of 'boolean', 'flag', 'count', 'enumerated', 'categorical', 'linear',
+        or 'exponential'."""
+        return self._kind
+
+    def expiration(self):
+        """Return the expiration version of the histogram."""
+        return self._expiration
+
+    def nsITelemetry_kind(self):
+        """Return the nsITelemetry constant corresponding to the kind of
+        the histogram."""
+        return self._nsITelemetry_kind
+
+    def low(self):
+        """Return the lower bound of the histogram."""
+        return self._low
+
+    def high(self):
+        """Return the high bound of the histogram."""
+        return self._high
+
+    def n_buckets(self):
+        """Return the number of buckets in the histogram."""
+        return self._n_buckets
+
+    def keyed(self):
+        """Returns True if this a keyed histogram, false otherwise."""
+        return self._keyed
+
+    def keys(self):
+        """Returns a list of allowed keys for keyed histogram, [] for others."""
+        return self._keys
+
+    def dataset(self):
+        """Returns the dataset this histogram belongs into."""
+        return self._dataset
+
+    def labels(self):
+        """Returns a list of labels for a categorical histogram, [] for others."""
+        return self._labels
+
+    def record_in_processes(self):
+        """Returns a list of processes this histogram is permitted to record in."""
+        return self._record_in_processes
+
+    def record_in_processes_enum(self):
+        """Get the non-empty list of flags representing the processes to record data in"""
+        return [utils.process_name_to_enum(p) for p in self.record_in_processes()]
+
+    def products(self):
+        """Get the non-empty list of products to record data on"""
+        return self._products
+
+    def products_enum(self):
+        """Get the non-empty list of flags representing products to record data on"""
+        return [utils.product_name_to_enum(p) for p in self.products()]
+
+    def operating_systems(self):
+        """Get the list of operating systems to record data on"""
+        return self._operating_systems
+
+    def record_on_os(self, target_os):
+        """Check if this probe should be recorded on the passed os."""
+        os = self.operating_systems()
+        if "all" in os:
+            return True
+
+        canonical_os = utils.canonical_os(target_os)
+
+        if "unix" in os and canonical_os in utils.UNIX_LIKE_OS:
+            return True
+
+        return canonical_os in os
+
+    def record_into_store(self):
+        """Get the non-empty list of stores to record into"""
+        return self._record_into_store
+
+    def ranges(self):
+        """Return an array of lower bounds for each bucket in the histogram."""
+        bucket_fns = {
+            "boolean": linear_buckets,
+            "flag": linear_buckets,
+            "count": linear_buckets,
+            "enumerated": linear_buckets,
+            "categorical": linear_buckets,
+            "linear": linear_buckets,
+            "exponential": exponential_buckets,
+        }
+
+        if self._kind not in bucket_fns:
+            ParserError(
+                'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)
+            ).handle_later()
+
+        fn = bucket_fns[self._kind]
+        return fn(self.low(), self.high(), self.n_buckets())
+
+    def compute_bucket_parameters(self, definition):
+        bucket_fns = {
+            "boolean": Histogram.boolean_flag_bucket_parameters,
+            "flag": Histogram.boolean_flag_bucket_parameters,
+            "count": Histogram.boolean_flag_bucket_parameters,
+            "enumerated": Histogram.enumerated_bucket_parameters,
+            "categorical": Histogram.categorical_bucket_parameters,
+            "linear": Histogram.linear_bucket_parameters,
+            "exponential": Histogram.exponential_bucket_parameters,
+        }
+
+        if self._kind not in bucket_fns:
+            ParserError(
+                'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)
+            ).handle_later()
+
+        fn = bucket_fns[self._kind]
+        self.set_bucket_parameters(*fn(definition))
+
+    def verify_attributes(self, name, definition):
+        general_keys = ALWAYS_ALLOWED_KEYS + ["low", "high", "n_buckets"]
+
+        table = {
+            "boolean": ALWAYS_ALLOWED_KEYS,
+            "flag": ALWAYS_ALLOWED_KEYS,
+            "count": ALWAYS_ALLOWED_KEYS,
+            "enumerated": ALWAYS_ALLOWED_KEYS + ["n_values"],
+            "categorical": ALWAYS_ALLOWED_KEYS + ["labels", "n_values"],
+            "linear": general_keys,
+            "exponential": general_keys,
+        }
+        # We removed extended_statistics_ok on the client, but the server-side,
+        # where _strict_type_checks==False, has to deal with historical data.
+        if not self._strict_type_checks:
+            table["exponential"].append("extended_statistics_ok")
+
+        kind = definition["kind"]
+        if kind not in table:
+            ParserError(
+                'Unknown kind "%s" for histogram "%s".' % (kind, name)
+            ).handle_later()
+        allowed_keys = table[kind]
+
+        self.check_name(name)
+        self.check_keys(name, definition, allowed_keys)
+        self.check_keys_field(name, definition)
+        self.check_field_types(name, definition)
+        self.check_allowlisted_kind(name, definition)
+        self.check_allowlistable_fields(name, definition)
+        self.check_expiration(name, definition)
+        self.check_label_values(name, definition)
+        self.check_record_in_processes(name, definition)
+        self.check_products(name, definition)
+        self.check_operating_systems(name, definition)
+        self.check_record_into_store(name, definition)
+
+    def check_name(self, name):
+        if "#" in name:
+            ParserError(
+                'Error for histogram name "%s": "#" is not allowed.' % (name)
+            ).handle_later()
+
+        # Avoid C++ identifier conflicts between histogram enums and label enum names.
+        if name.startswith("LABELS_"):
+            ParserError(
+                'Error for histogram name "%s":  can not start with "LABELS_".' % (name)
+            ).handle_later()
+
+        # To make it easier to generate C++ identifiers from this etc., we restrict
+        # the histogram names to a strict pattern.
+        # We skip this on the server to avoid failures with old Histogram.json revisions.
+        if self._strict_type_checks:
+            if not re.match(CPP_IDENTIFIER_PATTERN, name, re.IGNORECASE):
+                ParserError(
+                    'Error for histogram name "%s": name does not conform to "%s"'
+                    % (name, CPP_IDENTIFIER_PATTERN)
+                ).handle_later()
+
+    def check_expiration(self, name, definition):
+        field = "expires_in_version"
+        expiration = definition.get(field)
+
+        if not expiration:
+            return
+
+        # We forbid new probes from using "expires_in_version" : "default" field/value pair.
+        # Old ones that use this are added to the allowlist.
+        if (
+            expiration == "default"
+            and allowlists is not None
+            and name not in allowlists["expiry_default"]
+        ):
+            ParserError(
+                'New histogram "%s" cannot have "default" %s value.' % (name, field)
+            ).handle_later()
+
+        # Historical editions of Histograms.json can have the deprecated
+        # expiration format 'N.Na1'. Fortunately, those scripts set
+        # self._strict_type_checks to false.
+        if (
+            expiration != "default"
+            and not utils.validate_expiration_version(expiration)
+            and self._strict_type_checks
+        ):
+            ParserError(
+                (
+                    "Error for histogram {} - invalid {}: {}."
+                    "\nSee: {}#expires-in-version"
+                ).format(name, field, expiration, HISTOGRAMS_DOC_URL)
+            ).handle_later()
+
+        expiration = utils.add_expiration_postfix(expiration)
+
+        definition[field] = expiration
+
+    def check_label_values(self, name, definition):
+        labels = definition.get("labels")
+        if not labels:
+            return
+
+        invalid = filter(lambda l: len(l) > MAX_LABEL_LENGTH, labels)
+        if len(list(invalid)) > 0:
+            ParserError(
+                'Label values for "%s" exceed length limit of %d: %s'
+                % (name, MAX_LABEL_LENGTH, ", ".join(invalid))
+            ).handle_later()
+
+        if len(labels) > MAX_LABEL_COUNT:
+            ParserError(
+                'Label count for "%s" exceeds limit of %d' % (name, MAX_LABEL_COUNT)
+            ).handle_now()
+
+        # To make it easier to generate C++ identifiers from this etc., we restrict
+        # the label values to a strict pattern.
+        invalid = filter(
+            lambda l: not re.match(CPP_IDENTIFIER_PATTERN, l, re.IGNORECASE), labels
+        )
+        if len(list(invalid)) > 0:
+            ParserError(
+                'Label values for %s are not matching pattern "%s": %s'
+                % (name, CPP_IDENTIFIER_PATTERN, ", ".join(invalid))
+            ).handle_later()
+
+    def check_record_in_processes(self, name, definition):
+        if not self._strict_type_checks:
+            return
+
+        field = "record_in_processes"
+        rip = definition.get(field)
+
+        DOC_URL = HISTOGRAMS_DOC_URL + "#record-in-processes"
+
+        if not rip:
+            ParserError(
+                'Histogram "%s" must have a "%s" field:\n%s' % (name, field, DOC_URL)
+            ).handle_later()
+
+        for process in rip:
+            if not utils.is_valid_process_name(process):
+                ParserError(
+                    'Histogram "%s" has unknown process "%s" in %s.\n%s'
+                    % (name, process, field, DOC_URL)
+                ).handle_later()
+
+    def check_products(self, name, definition):
+        if not self._strict_type_checks:
+            return
+
+        field = "products"
+        products = definition.get(field)
+
+        DOC_URL = HISTOGRAMS_DOC_URL + "#products"
+
+        if not products:
+            ParserError(
+                'Histogram "%s" must have a "%s" field:\n%s' % (name, field, DOC_URL)
+            ).handle_now()
+
+        for product in products:
+            if not utils.is_valid_product(product):
+                ParserError(
+                    'Histogram "%s" has unknown product "%s" in %s.\n%s'
+                    % (name, product, field, DOC_URL)
+                ).handle_later()
+            if utils.is_geckoview_streaming_product(product):
+                kind = definition.get("kind")
+                if kind not in GECKOVIEW_STREAMING_SUPPORTED_KINDS:
+                    ParserError(
+                        (
+                            'Histogram "%s" is of kind "%s" which is unsupported for '
+                            'product "%s".'
+                        )
+                        % (name, kind, product)
+                    ).handle_later()
+                keyed = definition.get("keyed")
+                if keyed:
+                    ParserError(
+                        'Keyed histograms like "%s" are unsupported for product "%s"'
+                        % (name, product)
+                    ).handle_later()
+
+    def check_operating_systems(self, name, definition):
+        if not self._strict_type_checks:
+            return
+
+        field = "operating_systems"
+        operating_systems = definition.get(field)
+
+        DOC_URL = HISTOGRAMS_DOC_URL + "#operating-systems"
+
+        if not operating_systems:
+            # operating_systems is optional
+            return
+
+        for operating_system in operating_systems:
+            if not utils.is_valid_os(operating_system):
+                ParserError(
+                    'Histogram "%s" has unknown operating system "%s" in %s.\n%s'
+                    % (name, operating_system, field, DOC_URL)
+                ).handle_later()
+
+    def check_record_into_store(self, name, definition):
+        if not self._strict_type_checks:
+            return
+
+        field = "record_into_store"
+        DOC_URL = HISTOGRAMS_DOC_URL + "#record-into-store"
+
+        if field not in definition:
+            # record_into_store is optional
+            return
+
+        record_into_store = definition.get(field)
+        # record_into_store should not be empty
+        if not record_into_store:
+            ParserError(
+                'Histogram "%s" has empty list of stores, which is not allowed.\n%s'
+                % (name, DOC_URL)
+            ).handle_later()
+
+    def check_keys_field(self, name, definition):
+        keys = definition.get("keys")
+        if not self._strict_type_checks or keys is None:
+            return
+
+        if not definition.get("keyed", False):
+            raise ValueError(
+                "'keys' field is not valid for %s; only allowed for keyed histograms."
+                % (name)
+            )
+
+        if len(keys) == 0:
+            raise ValueError("The key list for %s cannot be empty" % (name))
+
+        if len(keys) > MAX_KEY_COUNT:
+            raise ValueError(
+                "Label count for %s exceeds limit of %d" % (name, MAX_KEY_COUNT)
+            )
+
+        invalid = filter(lambda k: len(k) > MAX_KEY_LENGTH, keys)
+        if len(list(invalid)) > 0:
+            raise ValueError(
+                '"keys" values for %s are exceeding length "%d": %s'
+                % (name, MAX_KEY_LENGTH, ", ".join(invalid))
+            )
+
+    def check_allowlisted_kind(self, name, definition):
+        # We don't need to run any of these checks on the server.
+        if not self._strict_type_checks or allowlists is None:
+            return
+
+        # Disallow "flag" and "count" histograms on desktop, suggest to use
+        # scalars instead. Allow using these histograms on Android, as we
+        # don't support scalars there yet.
+        hist_kind = definition.get("kind")
+        android_target = "android" in definition.get("operating_systems", [])
+
+        if (
+            not android_target
+            and hist_kind in ["flag", "count"]
+            and name not in allowlists["kind"]
+        ):
+            ParserError(
+                (
+                    'Unsupported kind "%s" for histogram "%s":\n'
+                    'New "%s" histograms are not supported on Desktop, you should'
+                    " use scalars instead:\n"
+                    "%s\n"
+                    "Are you trying to add a histogram on Android?"
+                    ' Add "operating_systems": ["android"] to your histogram definition.'
+                )
+                % (hist_kind, name, hist_kind, SCALARS_DOC_URL)
+            ).handle_now()
+
+    # Check for the presence of fields that old histograms are allowlisted for.
+    def check_allowlistable_fields(self, name, definition):
+        # We don't need to run any of these checks on the server.
+        if not self._strict_type_checks:
+            return
+
+        # In the pipeline we don't have allowlists available.
+        if allowlists is None:
+            return
+
+        for field in ["alert_emails", "bug_numbers"]:
+            if field not in definition and name not in allowlists[field]:
+                ParserError(
+                    'New histogram "%s" must have a "%s" field.' % (name, field)
+                ).handle_later()
+            if field in definition and name in allowlists[field]:
+                msg = (
+                    'Histogram "%s" should be removed from the allowlist for "%s" in '
+                    "histogram-allowlists.json."
+                )
+                ParserError(msg % (name, field)).handle_later()
+
+    def check_field_types(self, name, definition):
+        # Define expected types for the histogram properties.
+        type_checked_fields = {
+            "n_buckets": int,
+            "n_values": int,
+            "low": int,
+            "high": int,
+            "keyed": bool,
+            "expires_in_version": str,
+            "kind": str,
+            "description": str,
+            "releaseChannelCollection": str,
+        }
+
+        # For list fields we check the items types.
+        type_checked_list_fields = {
+            "bug_numbers": int,
+            "alert_emails": str,
+            "labels": str,
+            "record_in_processes": str,
+            "keys": str,
+            "products": str,
+            "operating_systems": str,
+            "record_into_store": str,
+        }
+
+        # For the server-side, where _strict_type_checks==False, we want to
+        # skip the stricter type checks for these fields for dealing with
+        # historical data.
+        coerce_fields = ["low", "high", "n_values", "n_buckets"]
+        if not self._strict_type_checks:
+            # This handles some old non-numeric expressions.
+            EXPRESSIONS = {
+                "JS::GCReason::NUM_TELEMETRY_REASONS": 101,
+                "mozilla::StartupTimeline::MAX_EVENT_ID": 12,
+            }
+
+            def try_to_coerce_to_number(v):
+                if v in EXPRESSIONS:
+                    return EXPRESSIONS[v]
+                try:
+                    return eval(v, {})
+                except Exception:
+                    return v
+
+            for key in [k for k in coerce_fields if k in definition]:
+                definition[key] = try_to_coerce_to_number(definition[key])
+            # This handles old "keyed":"true" definitions (bug 1271986).
+            if definition.get("keyed", None) == "true":
+                definition["keyed"] = True
+
+        def nice_type_name(t):
+            if t is str:
+                return "string"
+            return t.__name__
+
+        for key, key_type in type_checked_fields.items():
+            if key not in definition:
+                continue
+            if not isinstance(definition[key], key_type):
+                ParserError(
+                    'Value for key "{0}" in histogram "{1}" should be {2}.'.format(
+                        key, name, nice_type_name(key_type)
+                    )
+                ).handle_later()
+
+        # Make sure the max range is lower than or equal to INT_MAX
+        if "high" in definition and not c_int(definition["high"]).value > 0:
+            ParserError(
+                'Value for high in histogram "{0}" should be lower or equal to INT_MAX.'.format(
+                    nice_type_name(c_int)
+                )
+            ).handle_later()
+
+        for key, key_type in type_checked_list_fields.items():
+            if key not in definition:
+                continue
+            if not all(isinstance(x, key_type) for x in definition[key]):
+                ParserError(
+                    'All values for list "{0}" in histogram "{1}" should be of type'
+                    " {2}.".format(key, name, nice_type_name(key_type))
+                ).handle_later()
+
+    def check_keys(self, name, definition, allowed_keys):
+        if not self._strict_type_checks:
+            return
+        for key in iter(definition.keys()):
+            if key not in allowed_keys:
+                ParserError(
+                    'Key "%s" is not allowed for histogram "%s".' % (key, name)
+                ).handle_later()
+
+    def set_bucket_parameters(self, low, high, n_buckets):
+        self._low = low
+        self._high = high
+        self._n_buckets = n_buckets
+        max_n_buckets = 101 if self._kind in ["enumerated", "categorical"] else 100
+        if (
+            allowlists is not None
+            and self._n_buckets > max_n_buckets
+            and type(self._n_buckets) is int
+        ):
+            if self._name not in allowlists["n_buckets"]:
+                ParserError(
+                    'New histogram "%s" is not permitted to have more than 100 buckets.\n'
+                    "Histograms with large numbers of buckets use disproportionately high"
+                    " amounts of resources. Contact a Telemetry peer (e.g. in #telemetry)"
+                    " if you think an exception ought to be made:\n"
+                    "https://wiki.mozilla.org/Modules/Toolkit#Telemetry" % self._name
+                ).handle_later()
+
+    @staticmethod
+    def boolean_flag_bucket_parameters(definition):
+        return (1, 2, 3)
+
+    @staticmethod
+    def linear_bucket_parameters(definition):
+        return (definition.get("low", 1), definition["high"], definition["n_buckets"])
+
+    @staticmethod
+    def enumerated_bucket_parameters(definition):
+        n_values = definition["n_values"]
+        return (1, n_values, n_values + 1)
+
+    @staticmethod
+    def categorical_bucket_parameters(definition):
+        # Categorical histograms default to 50 buckets to make working with them easier.
+        # Otherwise when adding labels later we run into problems with the pipeline not
+        # supporting bucket changes.
+        # This can be overridden using the n_values field.
+        n_values = max(
+            len(definition["labels"]),
+            definition.get("n_values", 0),
+            MIN_CATEGORICAL_BUCKET_COUNT,
+        )
+        return (1, n_values, n_values + 1)
+
+    @staticmethod
+    def exponential_bucket_parameters(definition):
+        return (definition.get("low", 1), definition["high"], definition["n_buckets"])
+
+    def set_nsITelemetry_kind(self):
+        # Pick a Telemetry implementation type.
+        types = {
+            "boolean": "BOOLEAN",
+            "flag": "FLAG",
+            "count": "COUNT",
+            "enumerated": "LINEAR",
+            "categorical": "CATEGORICAL",
+            "linear": "LINEAR",
+            "exponential": "EXPONENTIAL",
+        }
+
+        if self._kind not in types:
+            ParserError(
+                'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)
+            ).handle_later()
+
+        self._nsITelemetry_kind = "nsITelemetry::HISTOGRAM_%s" % types[self._kind]
+
+    def set_dataset(self, definition):
+        datasets = {
+            "opt-in": "DATASET_PRERELEASE_CHANNELS",
+            "opt-out": "DATASET_ALL_CHANNELS",
+        }
+
+        value = definition.get("releaseChannelCollection", "opt-in")
+        if value not in datasets:
+            ParserError(
+                "Unknown value for releaseChannelCollection"
+                ' policy for histogram "%s".' % self._name
+            ).handle_later()
+
+        self._dataset = "nsITelemetry::" + datasets[value]
+
+
+# This hook function loads the histograms into an OrderedDict.
+# It will raise a ParserError if duplicate keys are found.
+def load_histograms_into_dict(ordered_pairs, strict_type_checks):
+    d = collections.OrderedDict()
+    for key, value in ordered_pairs:
+        if strict_type_checks and key in d:
+            ParserError(
+                "Found duplicate key in Histograms file: %s" % key
+            ).handle_later()
+        d[key] = value
+    return d
+
+
+# We support generating histograms from multiple different input files, not
+# just Histograms.json.  For each file's basename, we have a specific
+# routine to parse that file, and return a dictionary mapping histogram
+# names to histogram parameters.
+def from_json(filename, strict_type_checks):
+    with open(filename, "r") as f:
+        try:
+
+            def hook(ps):
+                return load_histograms_into_dict(ps, strict_type_checks)
+
+            histograms = json.load(f, object_pairs_hook=hook)
+        except ValueError as e:
+            ParserError(
+                "error parsing histograms in %s: %s" % (filename, e)
+            ).handle_now()
+    return histograms
+
+
+def to_camel_case(property_name):
+    return re.sub(
+        "(^|_|-)([a-z0-9])",
+        lambda m: m.group(2).upper(),
+        property_name.strip("_").strip("-"),
+    )
+
+
+FILENAME_PARSERS = [
+    (lambda x: from_json if x.endswith(".json") else None),
+]
+
+
+def from_files(filenames, strict_type_checks=True):
+    """Return an iterator that provides a sequence of Histograms for
+    the histograms defined in filenames.
+    """
+    if strict_type_checks:
+        load_allowlist()
+
+    all_histograms = OrderedDict()
+    for filename in filenames:
+        parser = None
+        for checkFn in FILENAME_PARSERS:
+            parser = checkFn(os.path.basename(filename))
+            if parser is not None:
+                break
+
+        if parser is None:
+            ParserError("Don't know how to parse %s." % filename).handle_now()
+
+        histograms = parser(filename, strict_type_checks)
+
+        # OrderedDicts are important, because then the iteration order over
+        # the parsed histograms is stable, which makes the insertion into
+        # all_histograms stable, which makes ordering in generated files
+        # stable, which makes builds more deterministic.
+        if not isinstance(histograms, OrderedDict):
+            ParserError("Histogram parser did not provide an OrderedDict.").handle_now()
+
+        for name, definition in histograms.items():
+            if name in all_histograms:
+                ParserError('Duplicate histogram name "%s".' % name).handle_later()
+            all_histograms[name] = definition
+
+    # Check that histograms that were removed from Histograms.json etc.
+    # are also removed from the allowlists.
+    if allowlists is not None:
+        all_allowlist_entries = itertools.chain.from_iterable(iter(allowlists.values()))
+        orphaned = set(all_allowlist_entries) - set(all_histograms.keys())
+        if len(orphaned) > 0:
+            msg = (
+                "The following entries are orphaned and should be removed from "
+                "histogram-allowlists.json:\n%s"
+            )
+            ParserError(msg % (", ".join(sorted(orphaned)))).handle_later()
+
+    for name, definition in all_histograms.items():
+        yield Histogram(name, definition, strict_type_checks=strict_type_checks)
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_scalars.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_scalars.py
new file mode 100644
index 0000000000..5ec591b393
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_scalars.py
@@ -0,0 +1,503 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import atexit
+import io
+import re
+
+import yaml
+
+from . import shared_telemetry_utils as utils
+from .shared_telemetry_utils import ParserError
+
+atexit.register(ParserError.exit_func)
+
+# The map of containing the allowed scalar types and their mapping to
+# nsITelemetry::SCALAR_TYPE_* type constants.
+
+BASE_DOC_URL = (
+    "https://firefox-source-docs.mozilla.org/toolkit/components/"
+    + "telemetry/telemetry/collection/scalars.html"
+)
+
+SCALAR_TYPES_MAP = {
+    "uint": "nsITelemetry::SCALAR_TYPE_COUNT",
+    "string": "nsITelemetry::SCALAR_TYPE_STRING",
+    "boolean": "nsITelemetry::SCALAR_TYPE_BOOLEAN",
+}
+
+
+class ScalarType:
+    """A class for representing a scalar definition."""
+
+    def __init__(self, category_name, probe_name, definition, strict_type_checks):
+        # Validate and set the name, so we don't need to pass it to the other
+        # validation functions.
+        self._strict_type_checks = strict_type_checks
+        self.validate_names(category_name, probe_name)
+        self._name = probe_name
+        self._category_name = category_name
+
+        # Validating the scalar definition.
+        self.validate_types(definition)
+        self.validate_values(definition)
+
+        # Everything is ok, set the rest of the data.
+        self._definition = definition
+        self._expires = utils.add_expiration_postfix(definition["expires"])
+
+    def validate_names(self, category_name, probe_name):
+        """Validate the category and probe name:
+            - Category name must be alpha-numeric + '.', no leading/trailing digit or '.'.
+            - Probe name must be alpha-numeric + '_', no leading/trailing digit or '_'.
+
+        :param category_name: the name of the category the probe is in.
+        :param probe_name: the name of the scalar probe.
+        :raises ParserError: if the length of the names exceeds the limit or they don't
+                conform our name specification.
+        """
+
+        # Enforce a maximum length on category and probe names.
+        MAX_NAME_LENGTH = 40
+        for n in [category_name, probe_name]:
+            if len(n) > MAX_NAME_LENGTH:
+                ParserError(
+                    (
+                        "Name '{}' exceeds maximum name length of {} characters.\n"
+                        "See: {}#the-yaml-definition-file"
+                    ).format(n, MAX_NAME_LENGTH, BASE_DOC_URL)
+                ).handle_later()
+
+        def check_name(name, error_msg_prefix, allowed_char_regexp):
+            # Check if we only have the allowed characters.
+            chars_regxp = r"^[a-zA-Z0-9" + allowed_char_regexp + r"]+$"
+            if not re.search(chars_regxp, name):
+                ParserError(
+                    (
+                        error_msg_prefix + " name must be alpha-numeric. Got: '{}'.\n"
+                        "See: {}#the-yaml-definition-file"
+                    ).format(name, BASE_DOC_URL)
+                ).handle_later()
+
+            # Don't allow leading/trailing digits, '.' or '_'.
+            if re.search(r"(^[\d\._])|([\d\._])$", name):
+                ParserError(
+                    (
+                        error_msg_prefix + " name must not have a leading/trailing "
+                        "digit, a dot or underscore. Got: '{}'.\n"
+                        " See: {}#the-yaml-definition-file"
+                    ).format(name, BASE_DOC_URL)
+                ).handle_later()
+
+        check_name(category_name, "Category", r"\.")
+        check_name(probe_name, "Probe", r"_")
+
+    def validate_types(self, definition):
+        """This function performs some basic sanity checks on the scalar definition:
+            - Checks that all the required fields are available.
+            - Checks that all the fields have the expected types.
+
+        :param definition: the dictionary containing the scalar properties.
+        :raises ParserError: if a scalar definition field is of the wrong type.
+        :raises ParserError: if a required field is missing or unknown fields are present.
+        """
+
+        if not self._strict_type_checks:
+            return
+
+        def validate_notification_email(notification_email):
+            # Perform simple email validation to make sure it doesn't contain spaces or commas.
+            return not any(c in notification_email for c in [",", " "])
+
+        # The required and optional fields in a scalar type definition.
+        REQUIRED_FIELDS = {
+            "bug_numbers": list,  # This contains ints. See LIST_FIELDS_CONTENT.
+            "description": str,
+            "expires": str,
+            "kind": str,
+            "notification_emails": list,  # This contains strings. See LIST_FIELDS_CONTENT.
+            "record_in_processes": list,
+            "products": list,
+        }
+
+        OPTIONAL_FIELDS = {
+            "release_channel_collection": str,
+            "keyed": bool,
+            "keys": list,
+            "operating_systems": list,
+            "record_into_store": list,
+        }
+
+        # The types for the data within the fields that hold lists.
+        LIST_FIELDS_CONTENT = {
+            "bug_numbers": int,
+            "notification_emails": str,
+            "record_in_processes": str,
+            "products": str,
+            "keys": str,
+            "operating_systems": str,
+            "record_into_store": str,
+        }
+
+        # Concatenate the required and optional field definitions.
+        ALL_FIELDS = REQUIRED_FIELDS.copy()
+        ALL_FIELDS.update(OPTIONAL_FIELDS)
+
+        # Checks that all the required fields are available.
+        missing_fields = [f for f in REQUIRED_FIELDS.keys() if f not in definition]
+        if len(missing_fields) > 0:
+            ParserError(
+                self._name
+                + " - missing required fields: "
+                + ", ".join(missing_fields)
+                + ".\nSee: {}#required-fields".format(BASE_DOC_URL)
+            ).handle_later()
+
+        # Do we have any unknown field?
+        unknown_fields = [f for f in definition.keys() if f not in ALL_FIELDS]
+        if len(unknown_fields) > 0:
+            ParserError(
+                self._name
+                + " - unknown fields: "
+                + ", ".join(unknown_fields)
+                + ".\nSee: {}#required-fields".format(BASE_DOC_URL)
+            ).handle_later()
+
+        # Checks the type for all the fields.
+        wrong_type_names = [
+            "{} must be {}".format(f, str(ALL_FIELDS[f]))
+            for f in definition.keys()
+            if not isinstance(definition[f], ALL_FIELDS[f])
+        ]
+        if len(wrong_type_names) > 0:
+            ParserError(
+                self._name
+                + " - "
+                + ", ".join(wrong_type_names)
+                + ".\nSee: {}#required-fields".format(BASE_DOC_URL)
+            ).handle_later()
+
+        # Check that the email addresses doesn't contain spaces or commas
+        notification_emails = definition.get("notification_emails")
+        for notification_email in notification_emails:
+            if not validate_notification_email(notification_email):
+                ParserError(
+                    self._name
+                    + " - invalid email address: "
+                    + notification_email
+                    + ".\nSee: {}".format(BASE_DOC_URL)
+                ).handle_later()
+
+        # Check that the lists are not empty and that data in the lists
+        # have the correct types.
+        list_fields = [f for f in definition if isinstance(definition[f], list)]
+        for field in list_fields:
+            # Check for empty lists.
+            if len(definition[field]) == 0:
+                ParserError(
+                    (
+                        "Field '{}' for probe '{}' must not be empty"
+                        + ".\nSee: {}#required-fields)"
+                    ).format(field, self._name, BASE_DOC_URL)
+                ).handle_later()
+            # Check the type of the list content.
+            broken_types = [
+                not isinstance(v, LIST_FIELDS_CONTENT[field]) for v in definition[field]
+            ]
+            if any(broken_types):
+                ParserError(
+                    (
+                        "Field '{}' for probe '{}' must only contain values of type {}"
+                        ".\nSee: {}#the-yaml-definition-file)"
+                    ).format(
+                        field,
+                        self._name,
+                        str(LIST_FIELDS_CONTENT[field]),
+                        BASE_DOC_URL,
+                    )
+                ).handle_later()
+
+        # Check that keys are only added to keyed scalars and that their values are valid
+        MAX_KEY_COUNT = 100
+        MAX_KEY_LENGTH = 72
+        keys = definition.get("keys")
+        if keys is not None:
+            if not definition.get("keyed", False):
+                ParserError(
+                    self._name
+                    + "- invalid field: "
+                    + "\n`keys` field only valid for keyed histograms"
+                ).handle_later()
+
+            if len(keys) > MAX_KEY_COUNT:
+                ParserError(
+                    self._name
+                    + " - exceeding key count: "
+                    + "\n`keys` values count  must not exceed {}".format(MAX_KEY_COUNT)
+                ).handle_later()
+
+            invalid = list(filter(lambda k: len(k) > MAX_KEY_LENGTH, keys))
+            if len(invalid) > 0:
+                ParserError(
+                    self._name
+                    + " - invalid key value"
+                    + "\n `keys` values are exceeding length {}:".format(MAX_KEY_LENGTH)
+                    + ", ".join(invalid)
+                ).handle_later()
+
+    def validate_values(self, definition):
+        """This function checks that the fields have the correct values.
+
+        :param definition: the dictionary containing the scalar properties.
+        :raises ParserError: if a scalar definition field contains an unexpected value.
+        """
+
+        if not self._strict_type_checks:
+            return
+
+        # Validate the scalar kind.
+        scalar_kind = definition.get("kind")
+        if scalar_kind not in SCALAR_TYPES_MAP.keys():
+            ParserError(
+                self._name
+                + " - unknown scalar kind: "
+                + scalar_kind
+                + ".\nSee: {}".format(BASE_DOC_URL)
+            ).handle_later()
+
+        # Validate the collection policy.
+        collection_policy = definition.get("release_channel_collection", None)
+        if collection_policy and collection_policy not in ["opt-in", "opt-out"]:
+            ParserError(
+                self._name
+                + " - unknown collection policy: "
+                + collection_policy
+                + ".\nSee: {}#optional-fields".format(BASE_DOC_URL)
+            ).handle_later()
+
+        # Validate operating_systems.
+        operating_systems = definition.get("operating_systems", [])
+        for operating_system in operating_systems:
+            if not utils.is_valid_os(operating_system):
+                ParserError(
+                    self._name
+                    + " - invalid entry in operating_systems: "
+                    + operating_system
+                    + ".\nSee: {}#optional-fields".format(BASE_DOC_URL)
+                ).handle_later()
+
+        # Validate record_in_processes.
+        record_in_processes = definition.get("record_in_processes", [])
+        for proc in record_in_processes:
+            if not utils.is_valid_process_name(proc):
+                ParserError(
+                    self._name
+                    + " - unknown value in record_in_processes: "
+                    + proc
+                    + ".\nSee: {}".format(BASE_DOC_URL)
+                ).handle_later()
+
+        # Validate product.
+        products = definition.get("products", [])
+        for product in products:
+            if not utils.is_valid_product(product):
+                ParserError(
+                    self._name
+                    + " - unknown value in products: "
+                    + product
+                    + ".\nSee: {}".format(BASE_DOC_URL)
+                ).handle_later()
+            if utils.is_geckoview_streaming_product(product):
+                keyed = definition.get("keyed")
+                if keyed:
+                    ParserError(
+                        "%s - keyed Scalars not supported for product %s"
+                        % (self._name, product)
+                    ).handle_later()
+
+        # Validate the expiration version.
+        # Historical versions of Scalars.json may contain expiration versions
+        # using the deprecated format 'N.Na1'. Those scripts set
+        # self._strict_type_checks to false.
+        expires = definition.get("expires")
+        if not utils.validate_expiration_version(expires) and self._strict_type_checks:
+            ParserError(
+                "{} - invalid expires: {}.\nSee: {}#required-fields".format(
+                    self._name, expires, BASE_DOC_URL
+                )
+            ).handle_later()
+
+    @property
+    def category(self):
+        """Get the category name"""
+        return self._category_name
+
+    @property
+    def name(self):
+        """Get the scalar name"""
+        return self._name
+
+    @property
+    def label(self):
+        """Get the scalar label generated from the scalar and category names."""
+        return self._category_name + "." + self._name
+
+    @property
+    def enum_label(self):
+        """Get the enum label generated from the scalar and category names. This is used to
+        generate the enum tables."""
+
+        # The scalar name can contain informations about its hierarchy (e.g. 'a.b.scalar').
+        # We can't have dots in C++ enums, replace them with an underscore. Also, make the
+        # label upper case for consistency with the histogram enums.
+        return self.label.replace(".", "_").upper()
+
+    @property
+    def bug_numbers(self):
+        """Get the list of related bug numbers"""
+        return self._definition["bug_numbers"]
+
+    @property
+    def description(self):
+        """Get the scalar description"""
+        return self._definition["description"]
+
+    @property
+    def expires(self):
+        """Get the scalar expiration"""
+        return self._expires
+
+    @property
+    def kind(self):
+        """Get the scalar kind"""
+        return self._definition["kind"]
+
+    @property
+    def keys(self):
+        """Get the allowed keys for this scalar or [] if there aren't any'"""
+        return self._definition.get("keys", [])
+
+    @property
+    def keyed(self):
+        """Boolean indicating whether this is a keyed scalar"""
+        return self._definition.get("keyed", False)
+
+    @property
+    def nsITelemetry_kind(self):
+        """Get the scalar kind constant defined in nsITelemetry"""
+        return SCALAR_TYPES_MAP.get(self.kind)
+
+    @property
+    def notification_emails(self):
+        """Get the list of notification emails"""
+        return self._definition["notification_emails"]
+
+    @property
+    def record_in_processes(self):
+        """Get the non-empty list of processes to record data in"""
+        # Before we added content process support in bug 1278556, we only recorded in the
+        # main process.
+        return self._definition.get("record_in_processes", ["main"])
+
+    @property
+    def record_in_processes_enum(self):
+        """Get the non-empty list of flags representing the processes to record data in"""
+        return [utils.process_name_to_enum(p) for p in self.record_in_processes]
+
+    @property
+    def products(self):
+        """Get the non-empty list of products to record data on"""
+        return self._definition.get("products")
+
+    @property
+    def products_enum(self):
+        """Get the non-empty list of flags representing products to record data on"""
+        return [utils.product_name_to_enum(p) for p in self.products]
+
+    @property
+    def dataset(self):
+        """Get the nsITelemetry constant equivalent to the chosen release channel collection
+        policy for the scalar.
+        """
+        rcc = self.dataset_short
+        table = {
+            "opt-in": "DATASET_PRERELEASE_CHANNELS",
+            "opt-out": "DATASET_ALL_CHANNELS",
+        }
+        return "nsITelemetry::" + table[rcc]
+
+    @property
+    def dataset_short(self):
+        """Get the short name of the chosen release channel collection policy for the scalar."""
+        # The collection policy is optional, but we still define a default
+        # behaviour for it.
+        return self._definition.get("release_channel_collection", "opt-in")
+
+    @property
+    def operating_systems(self):
+        """Get the list of operating systems to record data on"""
+        return self._definition.get("operating_systems", ["all"])
+
+    def record_on_os(self, target_os):
+        """Check if this probe should be recorded on the passed os."""
+        os = self.operating_systems
+        if "all" in os:
+            return True
+
+        canonical_os = utils.canonical_os(target_os)
+
+        if "unix" in os and canonical_os in utils.UNIX_LIKE_OS:
+            return True
+
+        return canonical_os in os
+
+    @property
+    def record_into_store(self):
+        """Get the list of stores this probe should be recorded into"""
+        return self._definition.get("record_into_store", ["main"])
+
+
+def load_scalars(filename, strict_type_checks=True):
+    """Parses a YAML file containing the scalar definition.
+
+    :param filename: the YAML file containing the scalars definition.
+    :raises ParserError: if the scalar file cannot be opened or parsed.
+    """
+
+    # Parse the scalar definitions from the YAML file.
+    scalars = None
+    try:
+        with io.open(filename, "r", encoding="utf-8") as f:
+            scalars = yaml.safe_load(f)
+    except IOError as e:
+        ParserError("Error opening " + filename + ": " + str(e)).handle_now()
+    except ValueError as e:
+        ParserError(
+            "Error parsing scalars in {}: {}"
+            ".\nSee: {}".format(filename, e, BASE_DOC_URL)
+        ).handle_now()
+
+    scalar_list = []
+
+    # Scalars are defined in a fixed two-level hierarchy within the definition file.
+    # The first level contains the category name, while the second level contains the
+    # probe name (e.g. "category.name: probe: ...").
+    for category_name in sorted(scalars):
+        category = scalars[category_name]
+
+        # Make sure that the category has at least one probe in it.
+        if not category or len(category) == 0:
+            ParserError(
+                'Category "{}" must have at least one probe in it'
+                ".\nSee: {}".format(category_name, BASE_DOC_URL)
+            ).handle_later()
+
+        for probe_name in sorted(category):
+            # We found a scalar type. Go ahead and parse it.
+            scalar_info = category[probe_name]
+            scalar_list.append(
+                ScalarType(category_name, probe_name, scalar_info, strict_type_checks)
+            )
+
+    return scalar_list
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_user_interactions.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_user_interactions.py
new file mode 100644
index 0000000000..6863d67ec4
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_user_interactions.py
@@ -0,0 +1,256 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import atexit
+import io
+import re
+
+import yaml
+
+from .shared_telemetry_utils import ParserError
+
+atexit.register(ParserError.exit_func)
+
+BASE_DOC_URL = (
+    "https://firefox-source-docs.mozilla.org/toolkit/components/"
+    + "telemetry/telemetry/collection/user_interactions.html"
+)
+
+
+class UserInteractionType:
+    """A class for representing a UserInteraction definition."""
+
+    def __init__(self, category_name, user_interaction_name, definition):
+        # Validate and set the name, so we don't need to pass it to the other
+        # validation functions.
+        self.validate_names(category_name, user_interaction_name)
+        self._name = user_interaction_name
+        self._category_name = category_name
+
+        # Validating the UserInteraction definition.
+        self.validate_types(definition)
+
+        # Everything is ok, set the rest of the data.
+        self._definition = definition
+
+    def validate_names(self, category_name, user_interaction_name):
+        """Validate the category and UserInteraction name:
+            - Category name must be alpha-numeric + '.', no leading/trailing digit or '.'.
+            - UserInteraction name must be alpha-numeric + '_', no leading/trailing digit or '_'.
+
+        :param category_name: the name of the category the UserInteraction is in.
+        :param user_interaction_name: the name of the UserInteraction.
+        :raises ParserError: if the length of the names exceeds the limit or they don't
+                conform our name specification.
+        """
+
+        # Enforce a maximum length on category and UserInteraction names.
+        MAX_NAME_LENGTH = 40
+        for n in [category_name, user_interaction_name]:
+            if len(n) > MAX_NAME_LENGTH:
+                ParserError(
+                    (
+                        "Name '{}' exceeds maximum name length of {} characters.\n"
+                        "See: {}#the-yaml-definition-file"
+                    ).format(n, MAX_NAME_LENGTH, BASE_DOC_URL)
+                ).handle_later()
+
+        def check_name(name, error_msg_prefix, allowed_char_regexp):
+            # Check if we only have the allowed characters.
+            chars_regxp = r"^[a-zA-Z0-9" + allowed_char_regexp + r"]+$"
+            if not re.search(chars_regxp, name):
+                ParserError(
+                    (
+                        error_msg_prefix + " name must be alpha-numeric. Got: '{}'.\n"
+                        "See: {}#the-yaml-definition-file"
+                    ).format(name, BASE_DOC_URL)
+                ).handle_later()
+
+            # Don't allow leading/trailing digits, '.' or '_'.
+            if re.search(r"(^[\d\._])|([\d\._])$", name):
+                ParserError(
+                    (
+                        error_msg_prefix + " name must not have a leading/trailing "
+                        "digit, a dot or underscore. Got: '{}'.\n"
+                        " See: {}#the-yaml-definition-file"
+                    ).format(name, BASE_DOC_URL)
+                ).handle_later()
+
+        check_name(category_name, "Category", r"\.")
+        check_name(user_interaction_name, "UserInteraction", r"_")
+
+    def validate_types(self, definition):
+        """This function performs some basic sanity checks on the UserInteraction
+           definition:
+            - Checks that all the required fields are available.
+            - Checks that all the fields have the expected types.
+
+        :param definition: the dictionary containing the UserInteraction
+               properties.
+        :raises ParserError: if a UserInteraction definition field is of the
+                wrong type.
+        :raises ParserError: if a required field is missing or unknown fields are present.
+        """
+
+        # The required and optional fields in a UserInteraction definition.
+        REQUIRED_FIELDS = {
+            "bug_numbers": list,  # This contains ints. See LIST_FIELDS_CONTENT.
+            "description": str,
+        }
+
+        # The types for the data within the fields that hold lists.
+        LIST_FIELDS_CONTENT = {
+            "bug_numbers": int,
+        }
+
+        ALL_FIELDS = REQUIRED_FIELDS.copy()
+
+        # Checks that all the required fields are available.
+        missing_fields = [f for f in REQUIRED_FIELDS.keys() if f not in definition]
+        if len(missing_fields) > 0:
+            ParserError(
+                self._name
+                + " - missing required fields: "
+                + ", ".join(missing_fields)
+                + ".\nSee: {}#required-fields".format(BASE_DOC_URL)
+            ).handle_later()
+
+        # Do we have any unknown field?
+        unknown_fields = [f for f in definition.keys() if f not in ALL_FIELDS]
+        if len(unknown_fields) > 0:
+            ParserError(
+                self._name
+                + " - unknown fields: "
+                + ", ".join(unknown_fields)
+                + ".\nSee: {}#required-fields".format(BASE_DOC_URL)
+            ).handle_later()
+
+        # Checks the type for all the fields.
+        wrong_type_names = [
+            "{} must be {}".format(f, str(ALL_FIELDS[f]))
+            for f in definition.keys()
+            if not isinstance(definition[f], ALL_FIELDS[f])
+        ]
+        if len(wrong_type_names) > 0:
+            ParserError(
+                self._name
+                + " - "
+                + ", ".join(wrong_type_names)
+                + ".\nSee: {}#required-fields".format(BASE_DOC_URL)
+            ).handle_later()
+
+        # Check that the lists are not empty and that data in the lists
+        # have the correct types.
+        list_fields = [f for f in definition if isinstance(definition[f], list)]
+        for field in list_fields:
+            # Check for empty lists.
+            if len(definition[field]) == 0:
+                ParserError(
+                    (
+                        "Field '{}' for probe '{}' must not be empty"
+                        + ".\nSee: {}#required-fields)"
+                    ).format(field, self._name, BASE_DOC_URL)
+                ).handle_later()
+            # Check the type of the list content.
+            broken_types = [
+                not isinstance(v, LIST_FIELDS_CONTENT[field]) for v in definition[field]
+            ]
+            if any(broken_types):
+                ParserError(
+                    (
+                        "Field '{}' for probe '{}' must only contain values of type {}"
+                        ".\nSee: {}#the-yaml-definition-file)"
+                    ).format(
+                        field,
+                        self._name,
+                        str(LIST_FIELDS_CONTENT[field]),
+                        BASE_DOC_URL,
+                    )
+                ).handle_later()
+
+    @property
+    def category(self):
+        """Get the category name"""
+        return self._category_name
+
+    @property
+    def name(self):
+        """Get the UserInteraction name"""
+        return self._name
+
+    @property
+    def label(self):
+        """Get the UserInteraction label generated from the UserInteraction
+        and category names.
+        """
+        return self._category_name + "." + self._name
+
+    @property
+    def bug_numbers(self):
+        """Get the list of related bug numbers"""
+        return self._definition["bug_numbers"]
+
+    @property
+    def description(self):
+        """Get the UserInteraction description"""
+        return self._definition["description"]
+
+
+def load_user_interactions(filename):
+    """Parses a YAML file containing the UserInteraction definition.
+
+    :param filename: the YAML file containing the UserInteraction definition.
+    :raises ParserError: if the UserInteraction file cannot be opened or
+            parsed.
+    """
+
+    # Parse the UserInteraction definitions from the YAML file.
+    user_interactions = None
+    try:
+        with io.open(filename, "r", encoding="utf-8") as f:
+            user_interactions = yaml.safe_load(f)
+    except IOError as e:
+        ParserError("Error opening " + filename + ": " + str(e)).handle_now()
+    except ValueError as e:
+        ParserError(
+            "Error parsing UserInteractions in {}: {}"
+            ".\nSee: {}".format(filename, e, BASE_DOC_URL)
+        ).handle_now()
+
+    user_interaction_list = []
+
+    # UserInteractions are defined in a fixed two-level hierarchy within the
+    # definition file. The first level contains the category name, while the
+    # second level contains the UserInteraction name
+    # (e.g. "category.name: user.interaction: ...").
+    for category_name in sorted(user_interactions):
+        category = user_interactions[category_name]
+
+        # Make sure that the category has at least one UserInteraction in it.
+        if not category or len(category) == 0:
+            ParserError(
+                'Category "{}" must have at least one UserInteraction in it'
+                ".\nSee: {}".format(category_name, BASE_DOC_URL)
+            ).handle_later()
+
+        for user_interaction_name in sorted(category):
+            # We found a UserInteraction type. Go ahead and parse it.
+            user_interaction_info = category[user_interaction_name]
+            user_interaction_list.append(
+                UserInteractionType(
+                    category_name, user_interaction_name, user_interaction_info
+                )
+            )
+
+    return user_interaction_list
+
+
+def from_files(filenames):
+    all_user_interactions = []
+
+    for filename in filenames:
+        all_user_interactions += load_user_interactions(filename)
+
+    for user_interaction in all_user_interactions:
+        yield user_interaction
diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/shared_telemetry_utils.py b/toolkit/components/telemetry/build_scripts/mozparsers/shared_telemetry_utils.py
new file mode 100644
index 0000000000..4b4cc9f685
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/mozparsers/shared_telemetry_utils.py
@@ -0,0 +1,185 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This file contains utility functions shared by the scalars and the histogram generation
+# scripts.
+
+import os
+import re
+import sys
+
+import yaml
+
+# This is a list of flags that determine which process a measurement is allowed
+# to record from.
+KNOWN_PROCESS_FLAGS = {
+    "all": "All",
+    "all_children": "AllChildren",
+    "main": "Main",
+    "content": "Content",
+    "gpu": "Gpu",
+    "rdd": "Rdd",
+    "socket": "Socket",
+    "utility": "Utility",
+    # Historical Values
+    "all_childs": "AllChildren",  # Supporting files from before bug 1363725
+}
+
+GECKOVIEW_STREAMING_PRODUCT = "geckoview_streaming"
+
+SUPPORTED_PRODUCTS = {
+    "firefox": "Firefox",
+    "fennec": "Fennec",
+    GECKOVIEW_STREAMING_PRODUCT: "GeckoviewStreaming",
+    "thunderbird": "Thunderbird",
+    # Historical, deprecated values:
+    # 'geckoview': 'Geckoview',
+}
+
+SUPPORTED_OPERATING_SYSTEMS = [
+    "mac",
+    "linux",
+    "windows",
+    "android",
+    "unix",
+    "all",
+]
+
+# mozinfo identifies linux, BSD variants, Solaris and SunOS as unix
+# Solaris and SunOS are identified as "unix" OS.
+UNIX_LIKE_OS = [
+    "unix",
+    "linux",
+    "bsd",
+]
+
+CANONICAL_OPERATING_SYSTEMS = {
+    "darwin": "mac",
+    "linux": "linux",
+    "winnt": "windows",
+    "android": "android",
+    # for simplicity we treat all BSD and Solaris systems as unix
+    "gnu/kfreebsd": "unix",
+    "sunos": "unix",
+    "dragonfly": "unix",
+    "freeunix": "unix",
+    "netunix": "unix",
+    "openunix": "unix",
+}
+
+PROCESS_ENUM_PREFIX = "mozilla::Telemetry::Common::RecordedProcessType::"
+PRODUCT_ENUM_PREFIX = "mozilla::Telemetry::Common::SupportedProduct::"
+
+
+class ParserError(Exception):
+    """Thrown by different probe parsers. Errors are partitioned into
+    'immediately fatal' and 'eventually fatal' so that the parser can print
+    multiple error messages at a time. See bug 1401612 ."""
+
+    eventual_errors = []
+
+    def __init__(self, *args):
+        Exception.__init__(self, *args)
+
+    def handle_later(self):
+        ParserError.eventual_errors.append(self)
+
+    def handle_now(self):
+        ParserError.print_eventuals()
+        print(str(self), file=sys.stderr)
+        sys.stderr.flush()
+        os._exit(1)
+
+    @classmethod
+    def print_eventuals(cls):
+        while cls.eventual_errors:
+            print(str(cls.eventual_errors.pop(0)), file=sys.stderr)
+
+    @classmethod
+    def exit_func(cls):
+        if cls.eventual_errors:
+            cls("Some errors occurred").handle_now()
+
+
+def is_valid_process_name(name):
+    return name in KNOWN_PROCESS_FLAGS
+
+
+def process_name_to_enum(name):
+    return PROCESS_ENUM_PREFIX + KNOWN_PROCESS_FLAGS.get(name)
+
+
+def is_valid_product(name):
+    return name in SUPPORTED_PRODUCTS
+
+
+def is_geckoview_streaming_product(name):
+    return name == GECKOVIEW_STREAMING_PRODUCT
+
+
+def is_valid_os(name):
+    return name in SUPPORTED_OPERATING_SYSTEMS
+
+
+def canonical_os(os):
+    """Translate possible OS_TARGET names to their canonical value."""
+
+    return CANONICAL_OPERATING_SYSTEMS.get(os.lower()) or "unknown"
+
+
+def product_name_to_enum(product):
+    if not is_valid_product(product):
+        raise ParserError("Invalid product {}".format(product))
+    return PRODUCT_ENUM_PREFIX + SUPPORTED_PRODUCTS.get(product)
+
+
+def static_assert(output, expression, message):
+    """Writes a C++ compile-time assertion expression to a file.
+    :param output: the output stream.
+    :param expression: the expression to check.
+    :param message: the string literal that will appear if the expression evaluates to
+        false.
+    """
+    print('static_assert(%s, "%s");' % (expression, message), file=output)
+
+
+def validate_expiration_version(expiration):
+    """Makes sure the expiration version has the expected format.
+
+    Allowed examples: "10", "20", "60", "never"
+    Disallowed examples: "Never", "asd", "4000000", "60a1", "30.5a1"
+
+    :param expiration: the expiration version string.
+    :return: True if the expiration validates correctly, False otherwise.
+    """
+    if expiration != "never" and not re.match(r"^\d{1,3}$", expiration):
+        return False
+
+    return True
+
+
+def add_expiration_postfix(expiration):
+    """Formats the expiration version and adds a version postfix if needed.
+
+    :param expiration: the expiration version string.
+    :return: the modified expiration string.
+    """
+    if re.match(r"^[1-9][0-9]*$", expiration):
+        return expiration + ".0a1"
+
+    if re.match(r"^[1-9][0-9]*\.0$", expiration):
+        return expiration + "a1"
+
+    return expiration
+
+
+def load_yaml_file(filename):
+    """Load a YAML file from disk, throw a ParserError on failure."""
+    try:
+        with open(filename, "r") as f:
+            return yaml.safe_load(f)
+    except IOError as e:
+        raise ParserError("Error opening " + filename + ": " + str(e))
+    except ValueError as e:
+        raise ParserError("Error parsing processes in {}: {}".format(filename, e))
diff --git a/toolkit/components/telemetry/build_scripts/run_glean_parser.py b/toolkit/components/telemetry/build_scripts/run_glean_parser.py
new file mode 100644
index 0000000000..e71206e9b0
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/run_glean_parser.py
@@ -0,0 +1,17 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import sys
+from pathlib import Path
+
+from glean_parser import lint
+
+
+def main(output, *filenames):
+    if lint.glinter([Path(x) for x in filenames], {"allow_reserved": False}):
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main(sys.stdout, *sys.argv[1:])
diff --git a/toolkit/components/telemetry/build_scripts/setup.py b/toolkit/components/telemetry/build_scripts/setup.py
new file mode 100644
index 0000000000..bd8967aec5
--- /dev/null
+++ b/toolkit/components/telemetry/build_scripts/setup.py
@@ -0,0 +1,32 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from setuptools import find_packages, setup
+
+VERSION = "1.0.0"
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+setup(
+    author="Mozilla Telemetry Team",
+    author_email="telemetry-client-dev@mozilla.com",
+    url=(
+        "https://firefox-source-docs.mozilla.org/"
+        "toolkit/components/telemetry/telemetry/collection/index.html"
+    ),
+    name="mozparsers",
+    description="Shared parsers for the Telemetry probe regitries.",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    license="MPL 2.0",
+    packages=find_packages(),
+    version=VERSION,
+    classifiers=[
+        "Topic :: Software Development :: Build Tools",
+        "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
+        "Programming Language :: Python :: 2.7",
+    ],
+    keywords=["mozilla", "telemetry", "parsers"],
+)
-- 
cgit v1.2.3