diff options
Diffstat (limited to 'toolkit/components/telemetry/build_scripts')
20 files changed, 3686 insertions, 0 deletions
diff --git a/toolkit/components/telemetry/build_scripts/README.md b/toolkit/components/telemetry/build_scripts/README.md new file mode 100644 index 0000000000..4823580735 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/README.md @@ -0,0 +1,5 @@ +# Telemetry Registries Parsers +This package exports the parsers for Mozilla's probes registries. These registry file contains the definitions for the different probes (i.e. [scalars](https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/telemetry/collection/scalars.html), [histograms](https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/telemetry/collection/histograms.html) and [events](https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/telemetry/collection/events.html)) that can be used to collect data. + +# License +Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/toolkit/components/telemetry/build_scripts/gen_event_data.py b/toolkit/components/telemetry/build_scripts/gen_event_data.py new file mode 100644 index 0000000000..2e321cea72 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/gen_event_data.py @@ -0,0 +1,227 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Write out event information for C++. The events are defined +# in a file provided as a command-line argument. + +import itertools +import json +import sys +from collections import OrderedDict +from os import path + +from mozparsers import parse_events +from mozparsers.shared_telemetry_utils import ParserError, static_assert + +COMPONENTS_PATH = path.abspath( + path.join(path.dirname(__file__), path.pardir, path.pardir) +) +sys.path.append( + path.join(COMPONENTS_PATH, "glean", "build_scripts", "glean_parser_ext") +) +from string_table import StringTable + +# The banner/text at the top of the generated file. +banner = """/* This file is auto-generated, only for internal use in TelemetryEvent.h, + see gen_event_data.py. */ +""" + +file_header = """\ +#ifndef mozilla_TelemetryEventData_h +#define mozilla_TelemetryEventData_h +#include "core/EventInfo.h" +#include "nsITelemetry.h" +namespace { +""" + +file_footer = """\ +} // namespace +#endif // mozilla_TelemetryEventData_h +""" + + +def write_extra_table(events, output, string_table): + table_name = "gExtraKeysTable" + extra_table = [] + extra_count = 0 + + print("#if defined(_MSC_VER) && !defined(__clang__)", file=output) + print("const uint32_t %s[] = {" % table_name, file=output) + print("#else", file=output) + print("constexpr uint32_t %s[] = {" % table_name, file=output) + print("#endif", file=output) + + for e in events: + extra_index = 0 + extra_keys = e.extra_keys + if len(extra_keys) > 0: + extra_index = extra_count + extra_count += len(extra_keys) + indexes = string_table.stringIndexes(extra_keys) + + print( + " // %s, [%s], [%s]" + % (e.category, ", ".join(e.methods), ", ".join(e.objects)), + file=output, + ) + print(" // extra_keys: %s" % ", ".join(extra_keys), file=output) + print(" %s," % ", ".join(map(str, indexes)), file=output) + + extra_table.append((extra_index, len(extra_keys))) + + print("};", file=output) + static_assert(output, "sizeof(%s) <= UINT32_MAX" % table_name, "index overflow") + + return extra_table + + +def write_common_event_table(events, output, string_table, extra_table): + table_name = "gCommonEventInfo" + + print("#if defined(_MSC_VER) && !defined(__clang__)", file=output) + print("const CommonEventInfo %s[] = {" % table_name, file=output) + print("#else", file=output) + print("constexpr CommonEventInfo %s[] = {" % table_name, file=output) + print("#endif", file=output) + + for e, extras in zip(events, extra_table): + # Write a comment to make the file human-readable. + print(" // category: %s" % e.category, file=output) + print(" // methods: [%s]" % ", ".join(e.methods), file=output) + print(" // objects: [%s]" % ", ".join(e.objects), file=output) + + # Write the common info structure + print( + " {%d, %d, %d, %d, %s, %s, %s }," + % ( + string_table.stringIndex(e.category), + string_table.stringIndex(e.expiry_version), + extras[0], # extra keys index + extras[1], # extra keys count + e.dataset, + " | ".join(e.record_in_processes_enum), + " | ".join(e.products_enum), + ), + file=output, + ) + + print("};", file=output) + static_assert(output, "sizeof(%s) <= UINT32_MAX" % table_name, "index overflow") + + +def write_event_table(events, output, string_table): + table_name = "gEventInfo" + + print("#if defined(_MSC_VER) && !defined(__clang__)", file=output) + print("const EventInfo %s[] = {" % table_name, file=output) + print("#else", file=output) + print("constexpr EventInfo %s[] = {" % table_name, file=output) + print("#endif", file=output) + + for common_info_index, e in enumerate(events): + for method_name, object_name in itertools.product(e.methods, e.objects): + print( + " // category: %s, method: %s, object: %s" + % (e.category, method_name, object_name), + file=output, + ) + + print( + " {gCommonEventInfo[%d], %d, %d}," + % ( + common_info_index, + string_table.stringIndex(method_name), + string_table.stringIndex(object_name), + ), + file=output, + ) + + print("};", file=output) + static_assert(output, "sizeof(%s) <= UINT32_MAX" % table_name, "index overflow") + + +def generate_JSON_definitions(output, *filenames): + """Write the event definitions to a JSON file. + + :param output: the file to write the content to. + :param filenames: a list of filenames provided by the build system. + We only support a single file. + """ + # Load the event data. + events = [] + for filename in filenames: + try: + batch = parse_events.load_events(filename, True) + events.extend(batch) + except ParserError as ex: + print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr) + sys.exit(1) + + event_definitions = OrderedDict() + for event in events: + category = event.category + + if category not in event_definitions: + event_definitions[category] = OrderedDict() + + event_definitions[category][event.name] = OrderedDict( + { + "methods": event.methods, + "objects": event.objects, + "extra_keys": event.extra_keys, + "record_on_release": True + if event.dataset_short == "opt-out" + else False, + # We don't expire dynamic-builtin scalars: they're only meant for + # use in local developer builds anyway. They will expire when rebuilding. + "expires": event.expiry_version, + "expired": False, + "products": event.products, + } + ) + + json.dump(event_definitions, output, sort_keys=True) + + +def main(output, *filenames): + # Load the event data. + events = [] + for filename in filenames: + try: + batch = parse_events.load_events(filename, True) + events.extend(batch) + except ParserError as ex: + print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr) + sys.exit(1) + + # Write the scalar data file. + print(banner, file=output) + print(file_header, file=output) + + # Write the extra keys table. + string_table = StringTable() + extra_table = write_extra_table(events, output, string_table) + print("", file=output) + + # Write a table with the common event data. + write_common_event_table(events, output, string_table, extra_table) + print("", file=output) + + # Write the data for individual events. + write_event_table(events, output, string_table) + print("", file=output) + + # Write the string table. + string_table_name = "gEventsStringTable" + string_table.writeDefinition(output, string_table_name) + static_assert( + output, "sizeof(%s) <= UINT32_MAX" % string_table_name, "index overflow" + ) + print("", file=output) + + print(file_footer, file=output) + + +if __name__ == "__main__": + main(sys.stdout, *sys.argv[1:]) diff --git a/toolkit/components/telemetry/build_scripts/gen_event_enum.py b/toolkit/components/telemetry/build_scripts/gen_event_enum.py new file mode 100644 index 0000000000..9dd418b3dd --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/gen_event_enum.py @@ -0,0 +1,81 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Write out C++ enum definitions that represent the different event types. +# +# The events are defined in files provided as command-line arguments. + +import sys + +import buildconfig +from mozparsers import parse_events +from mozparsers.shared_telemetry_utils import ParserError + +banner = """/* This file is auto-generated, see gen_event_enum.py. */ +""" + +file_header = """\ +#ifndef mozilla_TelemetryEventEnums_h +#define mozilla_TelemetryEventEnums_h + +#include <stdint.h> + +namespace mozilla { +namespace Telemetry { +enum class EventID : uint32_t {\ +""" + +file_footer = """\ +}; +} // namespace mozilla +} // namespace Telemetry +#endif // mozilla_TelemetryEventEnums_h +""" + + +def main(output, *filenames): + # Load the events first. + events = [] + for filename in filenames: + try: + batch = parse_events.load_events(filename, True) + events.extend(batch) + except ParserError as ex: + print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr) + sys.exit(1) + + grouped = dict() + index = 0 + for e in events: + category = e.category + if category not in grouped: + grouped[category] = [] + grouped[category].append((index, e)) + index += len(e.enum_labels) + + # Write the enum file. + print(banner, file=output) + print(file_header, file=output) + + for category, indexed in sorted(grouped.items()): + category_cpp = indexed[0][1].category_cpp + + print(" // category: %s" % category, file=output) + + for event_index, e in indexed: + if e.record_on_os(buildconfig.substs["OS_TARGET"]): + for offset, label in enumerate(e.enum_labels): + print( + " %s_%s = %d," % (category_cpp, label, event_index + offset), + file=output, + ) + + print(" // meta", file=output) + print(" EventCount = %d," % index, file=output) + + print(file_footer, file=output) + + +if __name__ == "__main__": + main(sys.stdout, *sys.argv[1:]) diff --git a/toolkit/components/telemetry/build_scripts/gen_histogram_data.py b/toolkit/components/telemetry/build_scripts/gen_histogram_data.py new file mode 100644 index 0000000000..a203dde9f9 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/gen_histogram_data.py @@ -0,0 +1,297 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Write out histogram information for C++. The histograms are defined +# in a file provided as a command-line argument. + +import sys +from os import path + +import buildconfig +from mozparsers import parse_histograms +from mozparsers.shared_telemetry_utils import ParserError, static_assert + +COMPONENTS_PATH = path.abspath( + path.join(path.dirname(__file__), path.pardir, path.pardir) +) +sys.path.append( + path.join(COMPONENTS_PATH, "glean", "build_scripts", "glean_parser_ext") +) +from string_table import StringTable + +banner = """/* This file is auto-generated, see gen_histogram_data.py. */ +""" + + +def print_array_entry( + output, + histogram, + name_index, + exp_index, + label_index, + label_count, + key_index, + key_count, + store_index, + store_count, +): + if histogram.record_on_os(buildconfig.substs["OS_TARGET"]): + print( + " { %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %s, %s, %s, %s, %s, %s }," + % ( + histogram.low(), + histogram.high(), + histogram.n_buckets(), + name_index, + exp_index, + label_count, + key_count, + store_count, + label_index, + key_index, + store_index, + " | ".join(histogram.record_in_processes_enum()), + "true" if histogram.keyed() else "false", + histogram.nsITelemetry_kind(), + histogram.dataset(), + " | ".join(histogram.products_enum()), + ), + file=output, + ) + + +def write_histogram_table(output, histograms): + string_table = StringTable() + + label_table = [] + label_count = 0 + keys_table = [] + keys_count = 0 + store_table = [] + total_store_count = 0 + + print("constexpr HistogramInfo gHistogramInfos[] = {", file=output) + for histogram in histograms: + name_index = string_table.stringIndex(histogram.name()) + exp_index = string_table.stringIndex(histogram.expiration()) + + labels = histogram.labels() + label_index = 0 + if len(labels) > 0: + label_index = label_count + label_table.append((histogram.name(), string_table.stringIndexes(labels))) + label_count += len(labels) + + keys = histogram.keys() + key_index = 0 + if len(keys) > 0: + key_index = keys_count + keys_table.append((histogram.name(), string_table.stringIndexes(keys))) + keys_count += len(keys) + + stores = histogram.record_into_store() + store_index = 0 + if stores == ["main"]: + # if count == 1 && offset == UINT16_MAX -> only main store + store_index = "UINT16_MAX" + else: + store_index = total_store_count + store_table.append((histogram.name(), string_table.stringIndexes(stores))) + total_store_count += len(stores) + + print_array_entry( + output, + histogram, + name_index, + exp_index, + label_index, + len(labels), + key_index, + len(keys), + store_index, + len(stores), + ) + print("};\n", file=output) + + strtab_name = "gHistogramStringTable" + string_table.writeDefinition(output, strtab_name) + static_assert(output, "sizeof(%s) <= UINT32_MAX" % strtab_name, "index overflow") + + print("\n#if defined(_MSC_VER) && !defined(__clang__)", file=output) + print("const uint32_t gHistogramLabelTable[] = {", file=output) + print("#else", file=output) + print("constexpr uint32_t gHistogramLabelTable[] = {", file=output) + print("#endif", file=output) + for name, indexes in label_table: + print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output) + print("};", file=output) + static_assert( + output, "sizeof(gHistogramLabelTable) <= UINT16_MAX", "index overflow" + ) + + print("\n#if defined(_MSC_VER) && !defined(__clang__)", file=output) + print("const uint32_t gHistogramKeyTable[] = {", file=output) + print("#else", file=output) + print("constexpr uint32_t gHistogramKeyTable[] = {", file=output) + print("#endif", file=output) + for name, indexes in keys_table: + print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output) + print("};", file=output) + static_assert(output, "sizeof(gHistogramKeyTable) <= UINT16_MAX", "index overflow") + + store_table_name = "gHistogramStoresTable" + print("\n#if defined(_MSC_VER) && !defined(__clang__)", file=output) + print("const uint32_t {}[] = {{".format(store_table_name), file=output) + print("#else", file=output) + print("constexpr uint32_t {}[] = {{".format(store_table_name), file=output) + print("#endif", file=output) + for name, indexes in store_table: + print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output) + print("};", file=output) + static_assert( + output, "sizeof(%s) <= UINT16_MAX" % store_table_name, "index overflow" + ) + + +# Write out static asserts for histogram data. We'd prefer to perform +# these checks in this script itself, but since several histograms +# (generally enumerated histograms) use compile-time constants for +# their upper bounds, we have to let the compiler do the checking. + + +def static_asserts_for_boolean(output, histogram): + pass + + +def static_asserts_for_flag(output, histogram): + pass + + +def static_asserts_for_count(output, histogram): + pass + + +def static_asserts_for_enumerated(output, histogram): + n_values = histogram.high() + static_assert( + output, "%s > 2" % n_values, "Not enough values for %s" % histogram.name() + ) + + +def shared_static_asserts(output, histogram): + name = histogram.name() + low = histogram.low() + high = histogram.high() + n_buckets = histogram.n_buckets() + static_assert(output, "%s < %s" % (low, high), "low >= high for %s" % name) + static_assert(output, "%s > 2" % n_buckets, "Not enough values for %s" % name) + static_assert(output, "%s >= 1" % low, "Incorrect low value for %s" % name) + static_assert( + output, + "%s > %s" % (high, n_buckets), + "high must be > number of buckets for %s;" + " you may want an enumerated histogram" % name, + ) + + +def static_asserts_for_linear(output, histogram): + shared_static_asserts(output, histogram) + + +def static_asserts_for_exponential(output, histogram): + shared_static_asserts(output, histogram) + + +def write_histogram_static_asserts(output, histograms): + print( + """ +// Perform the checks at the beginning of HistogramGet at +// compile time, so that incorrect histogram definitions +// give compile-time errors, not runtime errors.""", + file=output, + ) + + table = { + "boolean": static_asserts_for_boolean, + "flag": static_asserts_for_flag, + "count": static_asserts_for_count, + "enumerated": static_asserts_for_enumerated, + "categorical": static_asserts_for_enumerated, + "linear": static_asserts_for_linear, + "exponential": static_asserts_for_exponential, + } + + target_os = buildconfig.substs["OS_TARGET"] + for histogram in histograms: + kind = histogram.kind() + if not histogram.record_on_os(target_os): + continue + + if kind not in table: + raise Exception( + 'Unknown kind "%s" for histogram "%s".' % (kind, histogram.name()) + ) + fn = table[kind] + fn(output, histogram) + + +def write_histogram_ranges(output, histograms): + # This generates static data to avoid costly initialization of histograms + # (especially exponential ones which require log and exp calls) at runtime. + # The format must exactly match that required in histogram.cc, which is + # 0, buckets..., INT_MAX. Additionally, the list ends in a 0 to aid asserts + # that validate that the length of the ranges list is correct.U cache miss. + print("#if defined(_MSC_VER) && !defined(__clang__)", file=output) + print("const int gHistogramBucketLowerBounds[] = {", file=output) + print("#else", file=output) + print("constexpr int gHistogramBucketLowerBounds[] = {", file=output) + print("#endif", file=output) + + # Print the dummy buckets for expired histograms, and set the offset to match. + print("0,1,2,INT_MAX,", file=output) + offset = 4 + ranges_offsets = {} + + for histogram in histograms: + ranges = tuple(histogram.ranges()) + if ranges not in ranges_offsets: + ranges_offsets[ranges] = offset + # Suffix each ranges listing with INT_MAX, to match histogram.cc's + # expected format. + offset += len(ranges) + 1 + print(",".join(map(str, ranges)), ",INT_MAX,", file=output) + print("0};", file=output) + + if offset > 32767: + raise Exception("Histogram offsets exceeded maximum value for an int16_t.") + + target_os = buildconfig.substs["OS_TARGET"] + print("#if defined(_MSC_VER) && !defined(__clang__)", file=output) + print("const int16_t gHistogramBucketLowerBoundIndex[] = {", file=output) + print("#else", file=output) + print("constexpr int16_t gHistogramBucketLowerBoundIndex[] = {", file=output) + print("#endif", file=output) + for histogram in histograms: + if histogram.record_on_os(target_os): + our_offset = ranges_offsets[tuple(histogram.ranges())] + print("%d," % our_offset, file=output) + + print("};", file=output) + + +def main(output, *filenames): + try: + histograms = list(parse_histograms.from_files(filenames)) + except ParserError as ex: + print("\nError processing histograms:\n" + str(ex) + "\n") + sys.exit(1) + + print(banner, file=output) + write_histogram_table(output, histograms) + write_histogram_ranges(output, histograms) + write_histogram_static_asserts(output, histograms) + + +if __name__ == "__main__": + main(sys.stdout, *sys.argv[1:]) diff --git a/toolkit/components/telemetry/build_scripts/gen_histogram_enum.py b/toolkit/components/telemetry/build_scripts/gen_histogram_enum.py new file mode 100644 index 0000000000..8d83e760c5 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/gen_histogram_enum.py @@ -0,0 +1,94 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Write out a C++ enum definition whose members are the names of +# histograms as well as the following other members: +# +# - HistogramCount +# +# The histograms are defined in files provided as command-line arguments. + +import sys + +import buildconfig +from mozparsers import parse_histograms +from mozparsers.shared_telemetry_utils import ParserError + +banner = """/* This file is auto-generated, see gen_histogram_enum.py. */ +""" + +header = """ +#ifndef mozilla_TelemetryHistogramEnums_h +#define mozilla_TelemetryHistogramEnums_h + +#include <cstdint> +#include <type_traits> + +namespace mozilla { +namespace Telemetry { +""" + +footer = """ +} // namespace mozilla +} // namespace Telemetry +#endif // mozilla_TelemetryHistogramEnums_h""" + + +def main(output, *filenames): + # Print header. + print(banner, file=output) + print(header, file=output) + + # Load the histograms. + try: + all_histograms = list(parse_histograms.from_files(filenames)) + except ParserError as ex: + print("\nError processing histograms:\n" + str(ex) + "\n") + sys.exit(1) + + # Print the histogram enums. + print("enum HistogramID : uint32_t {", file=output) + for histogram in all_histograms: + if histogram.record_on_os(buildconfig.substs["OS_TARGET"]): + print(" %s," % histogram.name(), file=output) + + print(" HistogramCount,", file=output) + + print("};", file=output) + + # Write categorical label enums. + categorical = filter(lambda h: h.kind() == "categorical", all_histograms) + categorical = filter( + lambda h: h.record_on_os(buildconfig.substs["OS_TARGET"]), categorical + ) + enums = [("LABELS_" + h.name(), h.labels(), h.name()) for h in categorical] + for name, labels, _ in enums: + print("\nenum class %s : uint32_t {" % name, file=output) + print(" %s" % ",\n ".join(labels), file=output) + print("};", file=output) + + print( + "\ntemplate<class T> struct IsCategoricalLabelEnum : std::false_type {};", + file=output, + ) + for name, _, _ in enums: + print( + "template<> struct IsCategoricalLabelEnum<%s> : std::true_type {};" % name, + file=output, + ) + + print("\ntemplate<class T> struct CategoricalLabelId {};", file=output) + for name, _, id in enums: + print( + "template<> struct CategoricalLabelId<%s> : " + "std::integral_constant<uint32_t, %s> {};" % (name, id), + file=output, + ) + + # Footer. + print(footer, file=output) + + +if __name__ == "__main__": + main(sys.stdout, *sys.argv[1:]) diff --git a/toolkit/components/telemetry/build_scripts/gen_histogram_phf.py b/toolkit/components/telemetry/build_scripts/gen_histogram_phf.py new file mode 100644 index 0000000000..38c7245506 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/gen_histogram_phf.py @@ -0,0 +1,73 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from mozparsers.shared_telemetry_utils import ParserError +from perfecthash import PerfectHash + +PHFSIZE = 1024 + +import sys + +import buildconfig +from mozparsers import parse_histograms + +banner = """/* This file is auto-generated, see gen_histogram_phf.py. */ +""" + +header = """ +#ifndef mozilla_TelemetryHistogramNameMap_h +#define mozilla_TelemetryHistogramNameMap_h + +#include "mozilla/PerfectHash.h" + +namespace mozilla { +namespace Telemetry { +""" + +footer = """ +} // namespace mozilla +} // namespace Telemetry +#endif // mozilla_TelemetryHistogramNameMap_h +""" + + +def main(output, *filenames): + """ + Generate a Perfect Hash Table for the Histogram name -> Histogram ID lookup. + The table is immutable once generated and we can avoid any dynamic memory allocation. + """ + + output.write(banner) + output.write(header) + + try: + histograms = list(parse_histograms.from_files(filenames)) + histograms = [ + h for h in histograms if h.record_on_os(buildconfig.substs["OS_TARGET"]) + ] + except ParserError as ex: + print("\nError processing histograms:\n" + str(ex) + "\n") + sys.exit(1) + + histograms = [ + (bytearray(hist.name(), "ascii"), idx) for (idx, hist) in enumerate(histograms) + ] + name_phf = PerfectHash(histograms, PHFSIZE) + + output.write( + name_phf.cxx_codegen( + name="HistogramIDByNameLookup", + entry_type="uint32_t", + lower_entry=lambda x: str(x[1]), + key_type="const nsACString&", + key_bytes="aKey.BeginReading()", + key_length="aKey.Length()", + ) + ) + + output.write(footer) + + +if __name__ == "__main__": + main(sys.stdout, *sys.argv[1:]) diff --git a/toolkit/components/telemetry/build_scripts/gen_process_data.py b/toolkit/components/telemetry/build_scripts/gen_process_data.py new file mode 100644 index 0000000000..2a494689ad --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/gen_process_data.py @@ -0,0 +1,80 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Write out processes data for C++. The processes are defined +# in a file provided as a command-line argument. + +import collections +import sys + +from mozparsers.shared_telemetry_utils import ParserError, load_yaml_file + +# The banner/text at the top of the generated file. +banner = """/* This file is auto-generated from Telemetry build scripts, + see gen_process_data.py. */ +""" + +file_header = """\ +#ifndef mozilla_TelemetryProcessData_h +#define mozilla_TelemetryProcessData_h + +#include "mozilla/TelemetryProcessEnums.h" + +namespace mozilla { +namespace Telemetry { +""" + +file_footer = """ +} // namespace Telemetry +} // namespace mozilla +#endif // mozilla_TelemetryProcessData_h""" + + +def to_enum_label(name): + return name.title().replace("_", "") + + +def write_processes_data(processes, output): + def p(line): + print(line, file=output) + + processes = collections.OrderedDict(processes) + + p("static GeckoProcessType ProcessIDToGeckoProcessType[%d] = {" % len(processes)) + for i, (name, value) in enumerate(sorted(processes.items())): + p( + " /* %d: ProcessID::%s = */ %s," + % (i, to_enum_label(name), value["gecko_enum"]) + ) + p("};") + p("") + p("#if defined(_MSC_VER) && !defined(__clang__)") + p("static const char* const ProcessIDToString[%d] = {" % len(processes)) + p("#else") + p("static constexpr const char* ProcessIDToString[%d] = {" % len(processes)) + p("#endif") + for i, (name, value) in enumerate(sorted(processes.items())): + p(' /* %d: ProcessID::%s = */ "%s",' % (i, to_enum_label(name), name)) + p("};") + + +def main(output, *filenames): + if len(filenames) > 1: + raise Exception("We don't support loading from more than one file.") + + try: + processes = load_yaml_file(filenames[0]) + + # Write the process data file. + print(banner, file=output) + print(file_header, file=output) + write_processes_data(processes, output) + print(file_footer, file=output) + except ParserError as ex: + print("\nError generating processes data:\n" + str(ex) + "\n") + sys.exit(1) + + +if __name__ == "__main__": + main(sys.stdout, *sys.argv[1:]) diff --git a/toolkit/components/telemetry/build_scripts/gen_process_enum.py b/toolkit/components/telemetry/build_scripts/gen_process_enum.py new file mode 100644 index 0000000000..bfe2d65e43 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/gen_process_enum.py @@ -0,0 +1,69 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Write out processes data for C++. The processes are defined +# in a file provided as a command-line argument. + +import collections +import sys + +from mozparsers.shared_telemetry_utils import ParserError, load_yaml_file + +# The banner/text at the top of the generated file. +banner = """/* This file is auto-generated from Telemetry build scripts, + see gen_process_enum.py. */ +""" + +file_header = """\ +#ifndef mozilla_TelemetryProcessEnums_h +#define mozilla_TelemetryProcessEnums_h + +#include <cstdint> + +namespace mozilla { +namespace Telemetry { +""" + +file_footer = """ +} // namespace Telemetry +} // namespace mozilla +#endif // mozilla_TelemetryProcessEnums_h""" + + +def to_enum_label(name): + return name.title().replace("_", "") + + +def write_processes_enum(processes, output): + def p(line): + print(line, file=output) + + processes = collections.OrderedDict(processes) + + p("enum class ProcessID : uint32_t {") + for i, (name, _) in enumerate(sorted(processes.items())): + p(" %s = %d," % (to_enum_label(name), i)) + p(" Count = %d" % len(processes)) + p("};") + + +def main(output, *filenames): + if len(filenames) > 1: + raise Exception("We don't support loading from more than one file.") + + try: + processes = load_yaml_file(filenames[0]) + + # Write the process data file. + print(banner, file=output) + print(file_header, file=output) + write_processes_enum(processes, output) + print(file_footer, file=output) + except ParserError as ex: + print("\nError generating processes enums:\n" + str(ex) + "\n") + sys.exit(1) + + +if __name__ == "__main__": + main(sys.stdout, *sys.argv[1:]) diff --git a/toolkit/components/telemetry/build_scripts/gen_scalar_data.py b/toolkit/components/telemetry/build_scripts/gen_scalar_data.py new file mode 100644 index 0000000000..6ef1f457b5 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/gen_scalar_data.py @@ -0,0 +1,216 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Write out scalar information for C++. The scalars are defined +# in a file provided as a command-line argument. + +import json +import sys +from collections import OrderedDict +from os import path + +import buildconfig +from mozparsers import parse_scalars +from mozparsers.shared_telemetry_utils import ParserError, static_assert + +COMPONENTS_PATH = path.abspath( + path.join(path.dirname(__file__), path.pardir, path.pardir) +) +sys.path.append( + path.join(COMPONENTS_PATH, "glean", "build_scripts", "glean_parser_ext") +) +from string_table import StringTable + +# The banner/text at the top of the generated file. +banner = """/* This file is auto-generated, only for internal use in TelemetryScalar.h, + see gen_scalar_data.py. */ +""" + +file_header = """\ +#ifndef mozilla_TelemetryScalarData_h +#define mozilla_TelemetryScalarData_h +#include "core/ScalarInfo.h" +#include "nsITelemetry.h" +namespace { +""" + +file_footer = """\ +} // namespace +#endif // mozilla_TelemetryScalarData_h +""" + + +def write_scalar_info( + scalar, + output, + name_index, + expiration_index, + store_index, + store_count, + key_count, + key_index, +): + """Writes a scalar entry to the output file. + + :param scalar: a ScalarType instance describing the scalar. + :param output: the output stream. + :param name_index: the index of the scalar name in the strings table. + :param expiration_index: the index of the expiration version in the strings table. + """ + if scalar.record_on_os(buildconfig.substs["OS_TARGET"]): + print( + " {{ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }},".format( + scalar.nsITelemetry_kind, + name_index, + expiration_index, + scalar.dataset, + " | ".join(scalar.record_in_processes_enum), + "true" if scalar.keyed else "false", + key_count, + key_index, + " | ".join(scalar.products_enum), + store_count, + store_index, + ), + file=output, + ) + + +def write_scalar_tables(scalars, output): + """Writes the scalar and strings tables to an header file. + + :param scalars: a list of ScalarType instances describing the scalars. + :param output: the output stream. + """ + string_table = StringTable() + + store_table = [] + total_store_count = 0 + + keys_table = [] + total_key_count = 0 + + print("const ScalarInfo gScalars[] = {", file=output) + for s in scalars: + # We add both the scalar label and the expiration string to the strings + # table. + name_index = string_table.stringIndex(s.label) + exp_index = string_table.stringIndex(s.expires) + + stores = s.record_into_store + store_index = 0 + if stores == ["main"]: + # if count == 1 && offset == UINT16_MAX -> only main store + store_index = "UINT16_MAX" + else: + store_index = total_store_count + store_table.append((s.label, string_table.stringIndexes(stores))) + total_store_count += len(stores) + + keys = s.keys + key_index = 0 + if len(keys) > 0: + key_index = total_key_count + keys_table.append((s.label, string_table.stringIndexes(keys))) + total_key_count += len(keys) + + # Write the scalar info entry. + write_scalar_info( + s, + output, + name_index, + exp_index, + store_index, + len(stores), + len(keys), + key_index, + ) + print("};", file=output) + + string_table_name = "gScalarsStringTable" + string_table.writeDefinition(output, string_table_name) + static_assert( + output, "sizeof(%s) <= UINT32_MAX" % string_table_name, "index overflow" + ) + + print("\nconstexpr uint32_t gScalarKeysTable[] = {", file=output) + for name, indexes in keys_table: + print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output) + print("};", file=output) + + store_table_name = "gScalarStoresTable" + print("\n#if defined(_MSC_VER) && !defined(__clang__)", file=output) + print("const uint32_t {}[] = {{".format(store_table_name), file=output) + print("#else", file=output) + print("constexpr uint32_t {}[] = {{".format(store_table_name), file=output) + print("#endif", file=output) + for name, indexes in store_table: + print("/* %s */ %s," % (name, ", ".join(map(str, indexes))), file=output) + print("};", file=output) + static_assert( + output, "sizeof(%s) <= UINT16_MAX" % store_table_name, "index overflow" + ) + + +def parse_scalar_definitions(filenames): + scalars = [] + for filename in filenames: + try: + batch = parse_scalars.load_scalars(filename) + scalars.extend(batch) + except ParserError as ex: + print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr) + sys.exit(1) + return scalars + + +def generate_JSON_definitions(output, *filenames): + """Write the scalar definitions to a JSON file. + + :param output: the file to write the content to. + :param filenames: a list of filenames provided by the build system. + We only support a single file. + """ + scalars = parse_scalar_definitions(filenames) + + scalar_definitions = OrderedDict() + for scalar in scalars: + category = scalar.category + + if category not in scalar_definitions: + scalar_definitions[category] = OrderedDict() + + scalar_definitions[category][scalar.name] = OrderedDict( + { + "kind": scalar.nsITelemetry_kind, + "keyed": scalar.keyed, + "keys": scalar.keys, + "record_on_release": True + if scalar.dataset_short == "opt-out" + else False, + # We don't expire dynamic-builtin scalars: they're only meant for + # use in local developer builds anyway. They will expire when rebuilding. + "expired": False, + "stores": scalar.record_into_store, + "expires": scalar.expires, + "products": scalar.products, + } + ) + + json.dump(scalar_definitions, output) + + +def main(output, *filenames): + # Load the scalars first. + scalars = parse_scalar_definitions(filenames) + + # Write the scalar data file. + print(banner, file=output) + print(file_header, file=output) + write_scalar_tables(scalars, output) + print(file_footer, file=output) + + +if __name__ == "__main__": + main(sys.stdout, *sys.argv[1:]) diff --git a/toolkit/components/telemetry/build_scripts/gen_scalar_enum.py b/toolkit/components/telemetry/build_scripts/gen_scalar_enum.py new file mode 100644 index 0000000000..321cd047d7 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/gen_scalar_enum.py @@ -0,0 +1,60 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Write out a C++ enum definition whose members are the names of +# scalar types. +# +# The scalars are defined in files provided as command-line arguments. + +import sys + +import buildconfig +from mozparsers import parse_scalars +from mozparsers.shared_telemetry_utils import ParserError + +banner = """/* This file is auto-generated, see gen_scalar_enum.py. */ +""" + +file_header = """\ +#ifndef mozilla_TelemetryScalarEnums_h +#define mozilla_TelemetryScalarEnums_h +namespace mozilla { +namespace Telemetry { +enum class ScalarID : uint32_t {\ +""" + +file_footer = """\ +}; +} // namespace mozilla +} // namespace Telemetry +#endif // mozilla_TelemetryScalarEnums_h +""" + + +def main(output, *filenames): + # Load the scalars first. + scalars = [] + for filename in filenames: + try: + batch = parse_scalars.load_scalars(filename) + scalars.extend(batch) + except ParserError as ex: + print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr) + sys.exit(1) + + # Write the enum file. + print(banner, file=output) + print(file_header, file=output) + + for s in scalars: + if s.record_on_os(buildconfig.substs["OS_TARGET"]): + print(" %s," % s.enum_label, file=output) + + print(" ScalarCount,", file=output) + + print(file_footer, file=output) + + +if __name__ == "__main__": + main(sys.stdout, *sys.argv[1:]) diff --git a/toolkit/components/telemetry/build_scripts/gen_userinteraction_data.py b/toolkit/components/telemetry/build_scripts/gen_userinteraction_data.py new file mode 100644 index 0000000000..b12cbde239 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/gen_userinteraction_data.py @@ -0,0 +1,105 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Write out UserInteraction information for C++. The UserInteractions are +# defined in a file provided as a command-line argument. + +import sys +from os import path + +from mozparsers import parse_user_interactions +from mozparsers.shared_telemetry_utils import ParserError, static_assert + +COMPONENTS_PATH = path.abspath( + path.join(path.dirname(__file__), path.pardir, path.pardir) +) +sys.path.append( + path.join(COMPONENTS_PATH, "glean", "build_scripts", "glean_parser_ext") +) +import sys + +from string_table import StringTable + +# The banner/text at the top of the generated file. +banner = """/* This file is auto-generated, only for internal use in + TelemetryUserInteraction.h, see gen_userinteraction_data.py. */ +""" + +file_header = """\ +#ifndef mozilla_TelemetryUserInteractionData_h +#define mozilla_TelemetryUserInteractionData_h +#include "core/UserInteractionInfo.h" +""" + +file_footer = """\ +#endif // mozilla_TelemetryUserInteractionData_h +""" + + +def write_user_interaction_table(user_interactions, output, string_table): + head = """ + namespace mozilla { + namespace Telemetry { + namespace UserInteractionID { + const static uint32_t UserInteractionCount = %d; + } // namespace UserInteractionID + } // namespace Telemetry + } // namespace mozilla + """ + + print(head % len(user_interactions), file=output) + + print("namespace {", file=output) + + table_name = "gUserInteractions" + print("constexpr UserInteractionInfo %s[] = {" % table_name, file=output) + + for u in user_interactions: + name_index = string_table.stringIndex(u.label) + print(" UserInteractionInfo({}),".format(name_index), file=output) + print("};", file=output) + + static_assert( + output, + "sizeof(%s) <= UINT32_MAX" % table_name, + "index overflow of UserInteractionInfo table %s" % table_name, + ) + + print("} // namespace", file=output) + + +def main(output, *filenames): + # Load the UserInteraction data. + user_interactions = [] + for filename in filenames: + try: + batch = parse_user_interactions.load_user_interactions(filename) + user_interactions.extend(batch) + except ParserError as ex: + print("\nError processing %s:\n%s\n" % (filename, str(ex)), file=sys.stderr) + sys.exit(1) + + # Write the scalar data file. + print(banner, file=output) + print(file_header, file=output) + + string_table = StringTable() + + # Write the data for individual UserInteractions. + write_user_interaction_table(user_interactions, output, string_table) + print("", file=output) + + # Write the string table. + string_table_name = "gUserInteractionsStringTable" + string_table.writeDefinition(output, string_table_name) + static_assert( + output, "sizeof(%s) <= UINT32_MAX" % string_table_name, "index overflow" + ) + print("", file=output) + + print(file_footer, file=output) + + +if __name__ == "__main__": + main(sys.stdout, *sys.argv[1:]) diff --git a/toolkit/components/telemetry/build_scripts/gen_userinteraction_phf.py b/toolkit/components/telemetry/build_scripts/gen_userinteraction_phf.py new file mode 100644 index 0000000000..f1c7256414 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/gen_userinteraction_phf.py @@ -0,0 +1,70 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from mozparsers.shared_telemetry_utils import ParserError +from perfecthash import PerfectHash + +PHFSIZE = 1024 + +import sys + +from mozparsers import parse_user_interactions + +banner = """/* This file is auto-generated, see gen_userinteraction_phf.py. */ +""" + +header = """ +#ifndef mozilla_TelemetryUserInteractionNameMap_h +#define mozilla_TelemetryUserInteractionNameMap_h + +#include "mozilla/PerfectHash.h" + +namespace mozilla { +namespace Telemetry { +""" + +footer = """ +} // namespace mozilla +} // namespace Telemetry +#endif // mozilla_TelemetryUserInteractionNameMap_h +""" + + +def main(output, *filenames): + """ + Generate a Perfect Hash Table for the UserInteraction name -> UserInteraction ID lookup. + The table is immutable once generated and we can avoid any dynamic memory allocation. + """ + + output.write(banner) + output.write(header) + + try: + user_interactions = list(parse_user_interactions.from_files(filenames)) + except ParserError as ex: + print("\nError processing UserInteractions:\n" + str(ex) + "\n") + sys.exit(1) + + user_interactions = [ + (bytearray(ui.label, "ascii"), idx) + for (idx, ui) in enumerate(user_interactions) + ] + name_phf = PerfectHash(user_interactions, PHFSIZE) + + output.write( + name_phf.cxx_codegen( + name="UserInteractionIDByNameLookup", + entry_type="uint32_t", + lower_entry=lambda x: str(x[1]), + key_type="const nsACString&", + key_bytes="aKey.BeginReading()", + key_length="aKey.Length()", + ) + ) + + output.write(footer) + + +if __name__ == "__main__": + main(sys.stdout, *sys.argv[1:]) diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/__init__.py b/toolkit/components/telemetry/build_scripts/mozparsers/__init__.py new file mode 100644 index 0000000000..c580d191c1 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/mozparsers/__init__.py @@ -0,0 +1,3 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_events.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_events.py new file mode 100644 index 0000000000..09ed651917 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_events.py @@ -0,0 +1,477 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import atexit +import itertools +import re +import string + +import yaml + +from . import shared_telemetry_utils as utils +from .shared_telemetry_utils import ParserError + +atexit.register(ParserError.exit_func) + +MAX_CATEGORY_NAME_LENGTH = 30 +MAX_METHOD_NAME_LENGTH = 20 +MAX_OBJECT_NAME_LENGTH = 20 +MAX_EXTRA_KEYS_COUNT = 10 +MAX_EXTRA_KEY_NAME_LENGTH = 15 + +IDENTIFIER_PATTERN = r"^[a-zA-Z][a-zA-Z0-9_.]*[a-zA-Z0-9]$" + + +def nice_type_name(t): + if issubclass(t, str): + return "string" + return t.__name__ + + +def convert_to_cpp_identifier(s, sep): + return string.capwords(s, sep).replace(sep, "") + + +class OneOf: + """This is a placeholder type for the TypeChecker below. + It signals that the checked value should match one of the following arguments + passed to the TypeChecker constructor. + """ + + pass + + +class AtomicTypeChecker: + """Validate a simple value against a given type""" + + def __init__(self, instance_type): + self.instance_type = instance_type + + def check(self, identifier, key, value): + if not isinstance(value, self.instance_type): + ParserError( + "%s: Failed type check for %s - expected %s, got %s." + % ( + identifier, + key, + nice_type_name(self.instance_type), + nice_type_name(type(value)), + ) + ).handle_later() + + +class MultiTypeChecker: + """Validate a simple value against a list of possible types""" + + def __init__(self, *instance_types): + if not instance_types: + raise Exception("At least one instance type is required.") + self.instance_types = instance_types + + def check(self, identifier, key, value): + if not any(isinstance(value, i) for i in self.instance_types): + ParserError( + "%s: Failed type check for %s - got %s, expected one of:\n%s" + % ( + identifier, + key, + nice_type_name(type(value)), + " or ".join(map(nice_type_name, self.instance_types)), + ) + ).handle_later() + + +class ListTypeChecker: + """Validate a list of values against a given type""" + + def __init__(self, instance_type): + self.instance_type = instance_type + + def check(self, identifier, key, value): + if len(value) < 1: + ParserError( + "%s: Failed check for %s - list should not be empty." + % (identifier, key) + ).handle_now() + + for x in value: + if not isinstance(x, self.instance_type): + ParserError( + "%s: Failed type check for %s - expected list value type %s, got" + " %s." + % ( + identifier, + key, + nice_type_name(self.instance_type), + nice_type_name(type(x)), + ) + ).handle_later() + + +class DictTypeChecker: + """Validate keys and values of a dict against a given type""" + + def __init__(self, keys_instance_type, values_instance_type): + self.keys_instance_type = keys_instance_type + self.values_instance_type = values_instance_type + + def check(self, identifier, key, value): + if len(value.keys()) < 1: + ParserError( + "%s: Failed check for %s - dict should not be empty." + % (identifier, key) + ).handle_now() + for x in value.keys(): + if not isinstance(x, self.keys_instance_type): + ParserError( + "%s: Failed dict type check for %s - expected key type %s, got " + "%s." + % ( + identifier, + key, + nice_type_name(self.keys_instance_type), + nice_type_name(type(x)), + ) + ).handle_later() + for k, v in value.items(): + if not isinstance(v, self.values_instance_type): + ParserError( + "%s: Failed dict type check for %s - " + "expected value type %s for key %s, got %s." + % ( + identifier, + key, + nice_type_name(self.values_instance_type), + k, + nice_type_name(type(v)), + ) + ).handle_later() + + +def type_check_event_fields(identifier, name, definition): + """Perform a type/schema check on the event definition.""" + REQUIRED_FIELDS = { + "objects": ListTypeChecker(str), + "bug_numbers": ListTypeChecker(int), + "notification_emails": ListTypeChecker(str), + "record_in_processes": ListTypeChecker(str), + "description": AtomicTypeChecker(str), + "products": ListTypeChecker(str), + } + OPTIONAL_FIELDS = { + "methods": ListTypeChecker(str), + "release_channel_collection": AtomicTypeChecker(str), + "expiry_version": AtomicTypeChecker(str), + "extra_keys": DictTypeChecker(str, str), + "operating_systems": ListTypeChecker(str), + } + ALL_FIELDS = REQUIRED_FIELDS.copy() + ALL_FIELDS.update(OPTIONAL_FIELDS) + + # Check that all the required fields are available. + missing_fields = [f for f in REQUIRED_FIELDS.keys() if f not in definition] + if len(missing_fields) > 0: + ParserError( + identifier + ": Missing required fields: " + ", ".join(missing_fields) + ).handle_now() + + # Is there any unknown field? + unknown_fields = [f for f in definition.keys() if f not in ALL_FIELDS] + if len(unknown_fields) > 0: + ParserError( + identifier + ": Unknown fields: " + ", ".join(unknown_fields) + ).handle_later() + + # Type-check fields. + for k, v in definition.items(): + ALL_FIELDS[k].check(identifier, k, v) + + +def string_check(identifier, field, value, min_length=1, max_length=None, regex=None): + # Length check. + if len(value) < min_length: + ParserError( + "%s: Value '%s' for field %s is less than minimum length of %d." + % (identifier, value, field, min_length) + ).handle_later() + if max_length and len(value) > max_length: + ParserError( + "%s: Value '%s' for field %s is greater than maximum length of %d." + % (identifier, value, field, max_length) + ).handle_later() + # Regex check. + if regex and not re.match(regex, value): + ParserError( + '%s: String value "%s" for %s is not matching pattern "%s".' + % (identifier, value, field, regex) + ).handle_later() + + +class EventData: + """A class representing one event.""" + + def __init__(self, category, name, definition, strict_type_checks=False): + self._category = category + self._name = name + self._definition = definition + self._strict_type_checks = strict_type_checks + + type_check_event_fields(self.identifier, name, definition) + + # Check method & object string patterns. + if strict_type_checks: + for method in self.methods: + string_check( + self.identifier, + field="methods", + value=method, + min_length=1, + max_length=MAX_METHOD_NAME_LENGTH, + regex=IDENTIFIER_PATTERN, + ) + for obj in self.objects: + string_check( + self.identifier, + field="objects", + value=obj, + min_length=1, + max_length=MAX_OBJECT_NAME_LENGTH, + regex=IDENTIFIER_PATTERN, + ) + + # Check release_channel_collection + rcc_key = "release_channel_collection" + rcc = definition.get(rcc_key, "opt-in") + allowed_rcc = ["opt-in", "opt-out"] + if rcc not in allowed_rcc: + ParserError( + "%s: Value for %s should be one of: %s" + % (self.identifier, rcc_key, ", ".join(allowed_rcc)) + ).handle_later() + + # Check record_in_processes. + record_in_processes = definition.get("record_in_processes") + for proc in record_in_processes: + if not utils.is_valid_process_name(proc): + ParserError( + self.identifier + ": Unknown value in record_in_processes: " + proc + ).handle_later() + + # Check products. + products = definition.get("products") + for product in products: + if not utils.is_valid_product(product) and self._strict_type_checks: + ParserError( + self.identifier + ": Unknown value in products: " + product + ).handle_later() + if utils.is_geckoview_streaming_product(product): + ParserError( + "{}: Product `{}` unsupported for Event Telemetry".format( + self.identifier, product + ) + ).handle_later() + + # Check operating_systems. + operating_systems = definition.get("operating_systems", []) + for operating_system in operating_systems: + if not utils.is_valid_os(operating_system): + ParserError( + self.identifier + + ": Unknown value in operating_systems: " + + operating_system + ).handle_later() + + # Check extra_keys. + extra_keys = definition.get("extra_keys", {}) + if len(extra_keys.keys()) > MAX_EXTRA_KEYS_COUNT: + ParserError( + "%s: Number of extra_keys exceeds limit %d." + % (self.identifier, MAX_EXTRA_KEYS_COUNT) + ).handle_later() + for key in extra_keys.keys(): + string_check( + self.identifier, + field="extra_keys", + value=key, + min_length=1, + max_length=MAX_EXTRA_KEY_NAME_LENGTH, + regex=IDENTIFIER_PATTERN, + ) + + # Check expiry. + if "expiry_version" not in definition: + ParserError( + "%s: event is missing required field expiry_version" % (self.identifier) + ).handle_later() + + # Finish setup. + # Historical versions of Events.yaml may contain expiration versions + # using the deprecated format 'N.Na1'. Those scripts set + # self._strict_type_checks to false. + expiry_version = definition.get("expiry_version", "never") + if ( + not utils.validate_expiration_version(expiry_version) + and self._strict_type_checks + ): + ParserError( + "{}: invalid expiry_version: {}.".format( + self.identifier, expiry_version + ) + ).handle_now() + definition["expiry_version"] = utils.add_expiration_postfix(expiry_version) + + @property + def category(self): + return self._category + + @property + def category_cpp(self): + # Transform e.g. category.example into CategoryExample. + return convert_to_cpp_identifier(self._category, ".") + + @property + def name(self): + return self._name + + @property + def identifier(self): + return self.category + "#" + self.name + + @property + def methods(self): + return self._definition.get("methods", [self.name]) + + @property + def objects(self): + return self._definition.get("objects") + + @property + def record_in_processes(self): + return self._definition.get("record_in_processes") + + @property + def record_in_processes_enum(self): + """Get the non-empty list of flags representing the processes to record data in""" + return [utils.process_name_to_enum(p) for p in self.record_in_processes] + + @property + def products(self): + """Get the non-empty list of products to record data on""" + return self._definition.get("products") + + @property + def products_enum(self): + """Get the non-empty list of flags representing products to record data on""" + return [utils.product_name_to_enum(p) for p in self.products] + + @property + def expiry_version(self): + return self._definition.get("expiry_version") + + @property + def operating_systems(self): + """Get the list of operating systems to record data on""" + return self._definition.get("operating_systems", ["all"]) + + def record_on_os(self, target_os): + """Check if this probe should be recorded on the passed os.""" + os = self.operating_systems + if "all" in os: + return True + + canonical_os = utils.canonical_os(target_os) + + if "unix" in os and canonical_os in utils.UNIX_LIKE_OS: + return True + + return canonical_os in os + + @property + def enum_labels(self): + def enum(method_name, object_name): + m = convert_to_cpp_identifier(method_name, "_") + o = convert_to_cpp_identifier(object_name, "_") + return m + "_" + o + + combinations = itertools.product(self.methods, self.objects) + return [enum(t[0], t[1]) for t in combinations] + + @property + def dataset(self): + """Get the nsITelemetry constant equivalent for release_channel_collection.""" + rcc = self.dataset_short + if rcc == "opt-out": + return "nsITelemetry::DATASET_ALL_CHANNELS" + return "nsITelemetry::DATASET_PRERELEASE_CHANNELS" + + @property + def dataset_short(self): + """Get the short name of the chosen release channel collection policy for the event.""" + # The collection policy is optional, but we still define a default + # behaviour for it. + return self._definition.get("release_channel_collection", "opt-in") + + @property + def extra_keys(self): + return list(sorted(self._definition.get("extra_keys", {}).keys())) + + +def load_events(filename, strict_type_checks): + """Parses a YAML file containing the event definitions. + + :param filename: the YAML file containing the event definitions. + :strict_type_checks A boolean indicating whether to use the stricter type checks. + :raises ParserError: if the event file cannot be opened or parsed. + """ + + # Parse the event definitions from the YAML file. + events = None + try: + with open(filename, "r") as f: + events = yaml.safe_load(f) + except IOError as e: + ParserError("Error opening " + filename + ": " + str(e) + ".").handle_now() + except ParserError as e: + ParserError( + "Error parsing events in " + filename + ": " + str(e) + "." + ).handle_now() + + event_list = [] + + # Events are defined in a fixed two-level hierarchy within the definition file. + # The first level contains the category (group name), while the second level contains + # the event names and definitions, e.g.: + # category.name: + # event_name: + # <event definition> + # ... + # ... + for category_name, category in sorted(events.items()): + string_check( + "top level structure", + field="category", + value=category_name, + min_length=1, + max_length=MAX_CATEGORY_NAME_LENGTH, + regex=IDENTIFIER_PATTERN, + ) + + # Make sure that the category has at least one entry in it. + if not category or len(category) == 0: + ParserError( + "Category " + category_name + " must contain at least one entry." + ).handle_now() + + for name, entry in sorted(category.items()): + string_check( + category_name, + field="event name", + value=name, + min_length=1, + max_length=MAX_METHOD_NAME_LENGTH, + regex=IDENTIFIER_PATTERN, + ) + event_list.append(EventData(category_name, name, entry, strict_type_checks)) + + return event_list diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py new file mode 100644 index 0000000000..626188bf06 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py @@ -0,0 +1,836 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import atexit +import collections +import itertools +import json +import math +import os +import re +from collections import OrderedDict +from ctypes import c_int + +from . import shared_telemetry_utils as utils +from .shared_telemetry_utils import ParserError + +atexit.register(ParserError.exit_func) + +# Constants. +MAX_LABEL_LENGTH = 20 +MAX_LABEL_COUNT = 100 +MAX_KEY_COUNT = 30 +MAX_KEY_LENGTH = 20 +MIN_CATEGORICAL_BUCKET_COUNT = 50 +CPP_IDENTIFIER_PATTERN = "^[a-z][a-z0-9_]+[a-z0-9]$" + +ALWAYS_ALLOWED_KEYS = [ + "kind", + "description", + "operating_systems", + "expires_in_version", + "alert_emails", + "keyed", + "releaseChannelCollection", + "bug_numbers", + "keys", + "record_in_processes", + "record_into_store", + "products", +] + +BASE_DOC_URL = ( + "https://firefox-source-docs.mozilla.org/toolkit/components/" "telemetry/telemetry/" +) +HISTOGRAMS_DOC_URL = BASE_DOC_URL + "collection/histograms.html" +SCALARS_DOC_URL = BASE_DOC_URL + "collection/scalars.html" + +GECKOVIEW_STREAMING_SUPPORTED_KINDS = [ + "linear", + "exponential", + "categorical", +] + + +def linear_buckets(dmin, dmax, n_buckets): + ret_array = [0] * n_buckets + dmin = float(dmin) + dmax = float(dmax) + for i in range(1, n_buckets): + linear_range = (dmin * (n_buckets - 1 - i) + dmax * (i - 1)) / (n_buckets - 2) + ret_array[i] = int(linear_range + 0.5) + return ret_array + + +def exponential_buckets(dmin, dmax, n_buckets): + log_max = math.log(dmax) + bucket_index = 2 + ret_array = [0] * n_buckets + current = dmin + ret_array[1] = current + for bucket_index in range(2, n_buckets): + log_current = math.log(current) + log_ratio = (log_max - log_current) / (n_buckets - bucket_index) + log_next = log_current + log_ratio + next_value = int(math.floor(math.exp(log_next) + 0.5)) + if next_value > current: + current = next_value + else: + current = current + 1 + ret_array[bucket_index] = current + return ret_array + + +allowlists = None + + +def load_allowlist(): + global allowlists + try: + parsers_path = os.path.realpath(os.path.dirname(__file__)) + # The parsers live in build_scripts/parsers in the Telemetry module, while + # the histogram-allowlists file lives in the root of the module. Account + # for that when looking for the allowlist. + # NOTE: if the parsers are moved, this logic will need to be updated. + telemetry_module_path = os.path.abspath( + os.path.join(parsers_path, os.pardir, os.pardir) + ) + allowlist_path = os.path.join( + telemetry_module_path, "histogram-allowlists.json" + ) + with open(allowlist_path, "r") as f: + try: + allowlists = json.load(f) + for name, allowlist in allowlists.items(): + allowlists[name] = set(allowlist) + except ValueError: + ParserError("Error parsing allowlist: %s" % allowlist_path).handle_now() + except IOError: + allowlists = None + ParserError("Unable to parse allowlist: %s." % allowlist_path).handle_now() + + +class Histogram: + """A class for representing a histogram definition.""" + + def __init__(self, name, definition, strict_type_checks=False): + """Initialize a histogram named name with the given definition. + definition is a dict-like object that must contain at least the keys: + + - 'kind': The kind of histogram. Must be one of 'boolean', 'flag', + 'count', 'enumerated', 'linear', or 'exponential'. + - 'description': A textual description of the histogram. + - 'strict_type_checks': A boolean indicating whether to use the new, stricter type checks. + The server-side still has to deal with old, oddly typed + submissions, so we have to skip them there by default. + """ + self._strict_type_checks = strict_type_checks + self.verify_attributes(name, definition) + self._name = name + self._description = definition["description"] + self._kind = definition["kind"] + self._keys = definition.get("keys", []) + self._keyed = definition.get("keyed", False) + self._expiration = definition.get("expires_in_version") + self._labels = definition.get("labels", []) + self._record_in_processes = definition.get("record_in_processes") + self._record_into_store = definition.get("record_into_store", ["main"]) + self._products = definition.get("products") + self._operating_systems = definition.get("operating_systems", ["all"]) + + self.compute_bucket_parameters(definition) + self.set_nsITelemetry_kind() + self.set_dataset(definition) + + def name(self): + """Return the name of the histogram.""" + return self._name + + def description(self): + """Return the description of the histogram.""" + return self._description + + def kind(self): + """Return the kind of the histogram. + Will be one of 'boolean', 'flag', 'count', 'enumerated', 'categorical', 'linear', + or 'exponential'.""" + return self._kind + + def expiration(self): + """Return the expiration version of the histogram.""" + return self._expiration + + def nsITelemetry_kind(self): + """Return the nsITelemetry constant corresponding to the kind of + the histogram.""" + return self._nsITelemetry_kind + + def low(self): + """Return the lower bound of the histogram.""" + return self._low + + def high(self): + """Return the high bound of the histogram.""" + return self._high + + def n_buckets(self): + """Return the number of buckets in the histogram.""" + return self._n_buckets + + def keyed(self): + """Returns True if this a keyed histogram, false otherwise.""" + return self._keyed + + def keys(self): + """Returns a list of allowed keys for keyed histogram, [] for others.""" + return self._keys + + def dataset(self): + """Returns the dataset this histogram belongs into.""" + return self._dataset + + def labels(self): + """Returns a list of labels for a categorical histogram, [] for others.""" + return self._labels + + def record_in_processes(self): + """Returns a list of processes this histogram is permitted to record in.""" + return self._record_in_processes + + def record_in_processes_enum(self): + """Get the non-empty list of flags representing the processes to record data in""" + return [utils.process_name_to_enum(p) for p in self.record_in_processes()] + + def products(self): + """Get the non-empty list of products to record data on""" + return self._products + + def products_enum(self): + """Get the non-empty list of flags representing products to record data on""" + return [utils.product_name_to_enum(p) for p in self.products()] + + def operating_systems(self): + """Get the list of operating systems to record data on""" + return self._operating_systems + + def record_on_os(self, target_os): + """Check if this probe should be recorded on the passed os.""" + os = self.operating_systems() + if "all" in os: + return True + + canonical_os = utils.canonical_os(target_os) + + if "unix" in os and canonical_os in utils.UNIX_LIKE_OS: + return True + + return canonical_os in os + + def record_into_store(self): + """Get the non-empty list of stores to record into""" + return self._record_into_store + + def ranges(self): + """Return an array of lower bounds for each bucket in the histogram.""" + bucket_fns = { + "boolean": linear_buckets, + "flag": linear_buckets, + "count": linear_buckets, + "enumerated": linear_buckets, + "categorical": linear_buckets, + "linear": linear_buckets, + "exponential": exponential_buckets, + } + + if self._kind not in bucket_fns: + ParserError( + 'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name) + ).handle_later() + + fn = bucket_fns[self._kind] + return fn(self.low(), self.high(), self.n_buckets()) + + def compute_bucket_parameters(self, definition): + bucket_fns = { + "boolean": Histogram.boolean_flag_bucket_parameters, + "flag": Histogram.boolean_flag_bucket_parameters, + "count": Histogram.boolean_flag_bucket_parameters, + "enumerated": Histogram.enumerated_bucket_parameters, + "categorical": Histogram.categorical_bucket_parameters, + "linear": Histogram.linear_bucket_parameters, + "exponential": Histogram.exponential_bucket_parameters, + } + + if self._kind not in bucket_fns: + ParserError( + 'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name) + ).handle_later() + + fn = bucket_fns[self._kind] + self.set_bucket_parameters(*fn(definition)) + + def verify_attributes(self, name, definition): + general_keys = ALWAYS_ALLOWED_KEYS + ["low", "high", "n_buckets"] + + table = { + "boolean": ALWAYS_ALLOWED_KEYS, + "flag": ALWAYS_ALLOWED_KEYS, + "count": ALWAYS_ALLOWED_KEYS, + "enumerated": ALWAYS_ALLOWED_KEYS + ["n_values"], + "categorical": ALWAYS_ALLOWED_KEYS + ["labels", "n_values"], + "linear": general_keys, + "exponential": general_keys, + } + # We removed extended_statistics_ok on the client, but the server-side, + # where _strict_type_checks==False, has to deal with historical data. + if not self._strict_type_checks: + table["exponential"].append("extended_statistics_ok") + + kind = definition["kind"] + if kind not in table: + ParserError( + 'Unknown kind "%s" for histogram "%s".' % (kind, name) + ).handle_later() + allowed_keys = table[kind] + + self.check_name(name) + self.check_keys(name, definition, allowed_keys) + self.check_keys_field(name, definition) + self.check_field_types(name, definition) + self.check_allowlisted_kind(name, definition) + self.check_allowlistable_fields(name, definition) + self.check_expiration(name, definition) + self.check_label_values(name, definition) + self.check_record_in_processes(name, definition) + self.check_products(name, definition) + self.check_operating_systems(name, definition) + self.check_record_into_store(name, definition) + + def check_name(self, name): + if "#" in name: + ParserError( + 'Error for histogram name "%s": "#" is not allowed.' % (name) + ).handle_later() + + # Avoid C++ identifier conflicts between histogram enums and label enum names. + if name.startswith("LABELS_"): + ParserError( + 'Error for histogram name "%s": can not start with "LABELS_".' % (name) + ).handle_later() + + # To make it easier to generate C++ identifiers from this etc., we restrict + # the histogram names to a strict pattern. + # We skip this on the server to avoid failures with old Histogram.json revisions. + if self._strict_type_checks: + if not re.match(CPP_IDENTIFIER_PATTERN, name, re.IGNORECASE): + ParserError( + 'Error for histogram name "%s": name does not conform to "%s"' + % (name, CPP_IDENTIFIER_PATTERN) + ).handle_later() + + def check_expiration(self, name, definition): + field = "expires_in_version" + expiration = definition.get(field) + + if not expiration: + return + + # We forbid new probes from using "expires_in_version" : "default" field/value pair. + # Old ones that use this are added to the allowlist. + if ( + expiration == "default" + and allowlists is not None + and name not in allowlists["expiry_default"] + ): + ParserError( + 'New histogram "%s" cannot have "default" %s value.' % (name, field) + ).handle_later() + + # Historical editions of Histograms.json can have the deprecated + # expiration format 'N.Na1'. Fortunately, those scripts set + # self._strict_type_checks to false. + if ( + expiration != "default" + and not utils.validate_expiration_version(expiration) + and self._strict_type_checks + ): + ParserError( + ( + "Error for histogram {} - invalid {}: {}." + "\nSee: {}#expires-in-version" + ).format(name, field, expiration, HISTOGRAMS_DOC_URL) + ).handle_later() + + expiration = utils.add_expiration_postfix(expiration) + + definition[field] = expiration + + def check_label_values(self, name, definition): + labels = definition.get("labels") + if not labels: + return + + invalid = filter(lambda l: len(l) > MAX_LABEL_LENGTH, labels) + if len(list(invalid)) > 0: + ParserError( + 'Label values for "%s" exceed length limit of %d: %s' + % (name, MAX_LABEL_LENGTH, ", ".join(invalid)) + ).handle_later() + + if len(labels) > MAX_LABEL_COUNT: + ParserError( + 'Label count for "%s" exceeds limit of %d' % (name, MAX_LABEL_COUNT) + ).handle_now() + + # To make it easier to generate C++ identifiers from this etc., we restrict + # the label values to a strict pattern. + invalid = filter( + lambda l: not re.match(CPP_IDENTIFIER_PATTERN, l, re.IGNORECASE), labels + ) + if len(list(invalid)) > 0: + ParserError( + 'Label values for %s are not matching pattern "%s": %s' + % (name, CPP_IDENTIFIER_PATTERN, ", ".join(invalid)) + ).handle_later() + + def check_record_in_processes(self, name, definition): + if not self._strict_type_checks: + return + + field = "record_in_processes" + rip = definition.get(field) + + DOC_URL = HISTOGRAMS_DOC_URL + "#record-in-processes" + + if not rip: + ParserError( + 'Histogram "%s" must have a "%s" field:\n%s' % (name, field, DOC_URL) + ).handle_later() + + for process in rip: + if not utils.is_valid_process_name(process): + ParserError( + 'Histogram "%s" has unknown process "%s" in %s.\n%s' + % (name, process, field, DOC_URL) + ).handle_later() + + def check_products(self, name, definition): + if not self._strict_type_checks: + return + + field = "products" + products = definition.get(field) + + DOC_URL = HISTOGRAMS_DOC_URL + "#products" + + if not products: + ParserError( + 'Histogram "%s" must have a "%s" field:\n%s' % (name, field, DOC_URL) + ).handle_now() + + for product in products: + if not utils.is_valid_product(product): + ParserError( + 'Histogram "%s" has unknown product "%s" in %s.\n%s' + % (name, product, field, DOC_URL) + ).handle_later() + if utils.is_geckoview_streaming_product(product): + kind = definition.get("kind") + if kind not in GECKOVIEW_STREAMING_SUPPORTED_KINDS: + ParserError( + ( + 'Histogram "%s" is of kind "%s" which is unsupported for ' + 'product "%s".' + ) + % (name, kind, product) + ).handle_later() + keyed = definition.get("keyed") + if keyed: + ParserError( + 'Keyed histograms like "%s" are unsupported for product "%s"' + % (name, product) + ).handle_later() + + def check_operating_systems(self, name, definition): + if not self._strict_type_checks: + return + + field = "operating_systems" + operating_systems = definition.get(field) + + DOC_URL = HISTOGRAMS_DOC_URL + "#operating-systems" + + if not operating_systems: + # operating_systems is optional + return + + for operating_system in operating_systems: + if not utils.is_valid_os(operating_system): + ParserError( + 'Histogram "%s" has unknown operating system "%s" in %s.\n%s' + % (name, operating_system, field, DOC_URL) + ).handle_later() + + def check_record_into_store(self, name, definition): + if not self._strict_type_checks: + return + + field = "record_into_store" + DOC_URL = HISTOGRAMS_DOC_URL + "#record-into-store" + + if field not in definition: + # record_into_store is optional + return + + record_into_store = definition.get(field) + # record_into_store should not be empty + if not record_into_store: + ParserError( + 'Histogram "%s" has empty list of stores, which is not allowed.\n%s' + % (name, DOC_URL) + ).handle_later() + + def check_keys_field(self, name, definition): + keys = definition.get("keys") + if not self._strict_type_checks or keys is None: + return + + if not definition.get("keyed", False): + raise ValueError( + "'keys' field is not valid for %s; only allowed for keyed histograms." + % (name) + ) + + if len(keys) == 0: + raise ValueError("The key list for %s cannot be empty" % (name)) + + if len(keys) > MAX_KEY_COUNT: + raise ValueError( + "Label count for %s exceeds limit of %d" % (name, MAX_KEY_COUNT) + ) + + invalid = filter(lambda k: len(k) > MAX_KEY_LENGTH, keys) + if len(list(invalid)) > 0: + raise ValueError( + '"keys" values for %s are exceeding length "%d": %s' + % (name, MAX_KEY_LENGTH, ", ".join(invalid)) + ) + + def check_allowlisted_kind(self, name, definition): + # We don't need to run any of these checks on the server. + if not self._strict_type_checks or allowlists is None: + return + + # Disallow "flag" and "count" histograms on desktop, suggest to use + # scalars instead. Allow using these histograms on Android, as we + # don't support scalars there yet. + hist_kind = definition.get("kind") + android_target = "android" in definition.get("operating_systems", []) + + if ( + not android_target + and hist_kind in ["flag", "count"] + and name not in allowlists["kind"] + ): + ParserError( + ( + 'Unsupported kind "%s" for histogram "%s":\n' + 'New "%s" histograms are not supported on Desktop, you should' + " use scalars instead:\n" + "%s\n" + "Are you trying to add a histogram on Android?" + ' Add "operating_systems": ["android"] to your histogram definition.' + ) + % (hist_kind, name, hist_kind, SCALARS_DOC_URL) + ).handle_now() + + # Check for the presence of fields that old histograms are allowlisted for. + def check_allowlistable_fields(self, name, definition): + # We don't need to run any of these checks on the server. + if not self._strict_type_checks: + return + + # In the pipeline we don't have allowlists available. + if allowlists is None: + return + + for field in ["alert_emails", "bug_numbers"]: + if field not in definition and name not in allowlists[field]: + ParserError( + 'New histogram "%s" must have a "%s" field.' % (name, field) + ).handle_later() + if field in definition and name in allowlists[field]: + msg = ( + 'Histogram "%s" should be removed from the allowlist for "%s" in ' + "histogram-allowlists.json." + ) + ParserError(msg % (name, field)).handle_later() + + def check_field_types(self, name, definition): + # Define expected types for the histogram properties. + type_checked_fields = { + "n_buckets": int, + "n_values": int, + "low": int, + "high": int, + "keyed": bool, + "expires_in_version": str, + "kind": str, + "description": str, + "releaseChannelCollection": str, + } + + # For list fields we check the items types. + type_checked_list_fields = { + "bug_numbers": int, + "alert_emails": str, + "labels": str, + "record_in_processes": str, + "keys": str, + "products": str, + "operating_systems": str, + "record_into_store": str, + } + + # For the server-side, where _strict_type_checks==False, we want to + # skip the stricter type checks for these fields for dealing with + # historical data. + coerce_fields = ["low", "high", "n_values", "n_buckets"] + if not self._strict_type_checks: + # This handles some old non-numeric expressions. + EXPRESSIONS = { + "JS::GCReason::NUM_TELEMETRY_REASONS": 101, + "mozilla::StartupTimeline::MAX_EVENT_ID": 12, + } + + def try_to_coerce_to_number(v): + if v in EXPRESSIONS: + return EXPRESSIONS[v] + try: + return eval(v, {}) + except Exception: + return v + + for key in [k for k in coerce_fields if k in definition]: + definition[key] = try_to_coerce_to_number(definition[key]) + # This handles old "keyed":"true" definitions (bug 1271986). + if definition.get("keyed", None) == "true": + definition["keyed"] = True + + def nice_type_name(t): + if t is str: + return "string" + return t.__name__ + + for key, key_type in type_checked_fields.items(): + if key not in definition: + continue + if not isinstance(definition[key], key_type): + ParserError( + 'Value for key "{0}" in histogram "{1}" should be {2}.'.format( + key, name, nice_type_name(key_type) + ) + ).handle_later() + + # Make sure the max range is lower than or equal to INT_MAX + if "high" in definition and not c_int(definition["high"]).value > 0: + ParserError( + 'Value for high in histogram "{0}" should be lower or equal to INT_MAX.'.format( + nice_type_name(c_int) + ) + ).handle_later() + + for key, key_type in type_checked_list_fields.items(): + if key not in definition: + continue + if not all(isinstance(x, key_type) for x in definition[key]): + ParserError( + 'All values for list "{0}" in histogram "{1}" should be of type' + " {2}.".format(key, name, nice_type_name(key_type)) + ).handle_later() + + def check_keys(self, name, definition, allowed_keys): + if not self._strict_type_checks: + return + for key in iter(definition.keys()): + if key not in allowed_keys: + ParserError( + 'Key "%s" is not allowed for histogram "%s".' % (key, name) + ).handle_later() + + def set_bucket_parameters(self, low, high, n_buckets): + self._low = low + self._high = high + self._n_buckets = n_buckets + max_n_buckets = 101 if self._kind in ["enumerated", "categorical"] else 100 + if ( + allowlists is not None + and self._n_buckets > max_n_buckets + and type(self._n_buckets) is int + ): + if self._name not in allowlists["n_buckets"]: + ParserError( + 'New histogram "%s" is not permitted to have more than 100 buckets.\n' + "Histograms with large numbers of buckets use disproportionately high" + " amounts of resources. Contact a Telemetry peer (e.g. in #telemetry)" + " if you think an exception ought to be made:\n" + "https://wiki.mozilla.org/Modules/Toolkit#Telemetry" % self._name + ).handle_later() + + @staticmethod + def boolean_flag_bucket_parameters(definition): + return (1, 2, 3) + + @staticmethod + def linear_bucket_parameters(definition): + return (definition.get("low", 1), definition["high"], definition["n_buckets"]) + + @staticmethod + def enumerated_bucket_parameters(definition): + n_values = definition["n_values"] + return (1, n_values, n_values + 1) + + @staticmethod + def categorical_bucket_parameters(definition): + # Categorical histograms default to 50 buckets to make working with them easier. + # Otherwise when adding labels later we run into problems with the pipeline not + # supporting bucket changes. + # This can be overridden using the n_values field. + n_values = max( + len(definition["labels"]), + definition.get("n_values", 0), + MIN_CATEGORICAL_BUCKET_COUNT, + ) + return (1, n_values, n_values + 1) + + @staticmethod + def exponential_bucket_parameters(definition): + return (definition.get("low", 1), definition["high"], definition["n_buckets"]) + + def set_nsITelemetry_kind(self): + # Pick a Telemetry implementation type. + types = { + "boolean": "BOOLEAN", + "flag": "FLAG", + "count": "COUNT", + "enumerated": "LINEAR", + "categorical": "CATEGORICAL", + "linear": "LINEAR", + "exponential": "EXPONENTIAL", + } + + if self._kind not in types: + ParserError( + 'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name) + ).handle_later() + + self._nsITelemetry_kind = "nsITelemetry::HISTOGRAM_%s" % types[self._kind] + + def set_dataset(self, definition): + datasets = { + "opt-in": "DATASET_PRERELEASE_CHANNELS", + "opt-out": "DATASET_ALL_CHANNELS", + } + + value = definition.get("releaseChannelCollection", "opt-in") + if value not in datasets: + ParserError( + "Unknown value for releaseChannelCollection" + ' policy for histogram "%s".' % self._name + ).handle_later() + + self._dataset = "nsITelemetry::" + datasets[value] + + +# This hook function loads the histograms into an OrderedDict. +# It will raise a ParserError if duplicate keys are found. +def load_histograms_into_dict(ordered_pairs, strict_type_checks): + d = collections.OrderedDict() + for key, value in ordered_pairs: + if strict_type_checks and key in d: + ParserError( + "Found duplicate key in Histograms file: %s" % key + ).handle_later() + d[key] = value + return d + + +# We support generating histograms from multiple different input files, not +# just Histograms.json. For each file's basename, we have a specific +# routine to parse that file, and return a dictionary mapping histogram +# names to histogram parameters. +def from_json(filename, strict_type_checks): + with open(filename, "r") as f: + try: + + def hook(ps): + return load_histograms_into_dict(ps, strict_type_checks) + + histograms = json.load(f, object_pairs_hook=hook) + except ValueError as e: + ParserError( + "error parsing histograms in %s: %s" % (filename, e) + ).handle_now() + return histograms + + +def to_camel_case(property_name): + return re.sub( + "(^|_|-)([a-z0-9])", + lambda m: m.group(2).upper(), + property_name.strip("_").strip("-"), + ) + + +FILENAME_PARSERS = [ + (lambda x: from_json if x.endswith(".json") else None), +] + + +def from_files(filenames, strict_type_checks=True): + """Return an iterator that provides a sequence of Histograms for + the histograms defined in filenames. + """ + if strict_type_checks: + load_allowlist() + + all_histograms = OrderedDict() + for filename in filenames: + parser = None + for checkFn in FILENAME_PARSERS: + parser = checkFn(os.path.basename(filename)) + if parser is not None: + break + + if parser is None: + ParserError("Don't know how to parse %s." % filename).handle_now() + + histograms = parser(filename, strict_type_checks) + + # OrderedDicts are important, because then the iteration order over + # the parsed histograms is stable, which makes the insertion into + # all_histograms stable, which makes ordering in generated files + # stable, which makes builds more deterministic. + if not isinstance(histograms, OrderedDict): + ParserError("Histogram parser did not provide an OrderedDict.").handle_now() + + for name, definition in histograms.items(): + if name in all_histograms: + ParserError('Duplicate histogram name "%s".' % name).handle_later() + all_histograms[name] = definition + + # Check that histograms that were removed from Histograms.json etc. + # are also removed from the allowlists. + if allowlists is not None: + all_allowlist_entries = itertools.chain.from_iterable(iter(allowlists.values())) + orphaned = set(all_allowlist_entries) - set(all_histograms.keys()) + if len(orphaned) > 0: + msg = ( + "The following entries are orphaned and should be removed from " + "histogram-allowlists.json:\n%s" + ) + ParserError(msg % (", ".join(sorted(orphaned)))).handle_later() + + for name, definition in all_histograms.items(): + yield Histogram(name, definition, strict_type_checks=strict_type_checks) diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_scalars.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_scalars.py new file mode 100644 index 0000000000..5ec591b393 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_scalars.py @@ -0,0 +1,503 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import atexit +import io +import re + +import yaml + +from . import shared_telemetry_utils as utils +from .shared_telemetry_utils import ParserError + +atexit.register(ParserError.exit_func) + +# The map of containing the allowed scalar types and their mapping to +# nsITelemetry::SCALAR_TYPE_* type constants. + +BASE_DOC_URL = ( + "https://firefox-source-docs.mozilla.org/toolkit/components/" + + "telemetry/telemetry/collection/scalars.html" +) + +SCALAR_TYPES_MAP = { + "uint": "nsITelemetry::SCALAR_TYPE_COUNT", + "string": "nsITelemetry::SCALAR_TYPE_STRING", + "boolean": "nsITelemetry::SCALAR_TYPE_BOOLEAN", +} + + +class ScalarType: + """A class for representing a scalar definition.""" + + def __init__(self, category_name, probe_name, definition, strict_type_checks): + # Validate and set the name, so we don't need to pass it to the other + # validation functions. + self._strict_type_checks = strict_type_checks + self.validate_names(category_name, probe_name) + self._name = probe_name + self._category_name = category_name + + # Validating the scalar definition. + self.validate_types(definition) + self.validate_values(definition) + + # Everything is ok, set the rest of the data. + self._definition = definition + self._expires = utils.add_expiration_postfix(definition["expires"]) + + def validate_names(self, category_name, probe_name): + """Validate the category and probe name: + - Category name must be alpha-numeric + '.', no leading/trailing digit or '.'. + - Probe name must be alpha-numeric + '_', no leading/trailing digit or '_'. + + :param category_name: the name of the category the probe is in. + :param probe_name: the name of the scalar probe. + :raises ParserError: if the length of the names exceeds the limit or they don't + conform our name specification. + """ + + # Enforce a maximum length on category and probe names. + MAX_NAME_LENGTH = 40 + for n in [category_name, probe_name]: + if len(n) > MAX_NAME_LENGTH: + ParserError( + ( + "Name '{}' exceeds maximum name length of {} characters.\n" + "See: {}#the-yaml-definition-file" + ).format(n, MAX_NAME_LENGTH, BASE_DOC_URL) + ).handle_later() + + def check_name(name, error_msg_prefix, allowed_char_regexp): + # Check if we only have the allowed characters. + chars_regxp = r"^[a-zA-Z0-9" + allowed_char_regexp + r"]+$" + if not re.search(chars_regxp, name): + ParserError( + ( + error_msg_prefix + " name must be alpha-numeric. Got: '{}'.\n" + "See: {}#the-yaml-definition-file" + ).format(name, BASE_DOC_URL) + ).handle_later() + + # Don't allow leading/trailing digits, '.' or '_'. + if re.search(r"(^[\d\._])|([\d\._])$", name): + ParserError( + ( + error_msg_prefix + " name must not have a leading/trailing " + "digit, a dot or underscore. Got: '{}'.\n" + " See: {}#the-yaml-definition-file" + ).format(name, BASE_DOC_URL) + ).handle_later() + + check_name(category_name, "Category", r"\.") + check_name(probe_name, "Probe", r"_") + + def validate_types(self, definition): + """This function performs some basic sanity checks on the scalar definition: + - Checks that all the required fields are available. + - Checks that all the fields have the expected types. + + :param definition: the dictionary containing the scalar properties. + :raises ParserError: if a scalar definition field is of the wrong type. + :raises ParserError: if a required field is missing or unknown fields are present. + """ + + if not self._strict_type_checks: + return + + def validate_notification_email(notification_email): + # Perform simple email validation to make sure it doesn't contain spaces or commas. + return not any(c in notification_email for c in [",", " "]) + + # The required and optional fields in a scalar type definition. + REQUIRED_FIELDS = { + "bug_numbers": list, # This contains ints. See LIST_FIELDS_CONTENT. + "description": str, + "expires": str, + "kind": str, + "notification_emails": list, # This contains strings. See LIST_FIELDS_CONTENT. + "record_in_processes": list, + "products": list, + } + + OPTIONAL_FIELDS = { + "release_channel_collection": str, + "keyed": bool, + "keys": list, + "operating_systems": list, + "record_into_store": list, + } + + # The types for the data within the fields that hold lists. + LIST_FIELDS_CONTENT = { + "bug_numbers": int, + "notification_emails": str, + "record_in_processes": str, + "products": str, + "keys": str, + "operating_systems": str, + "record_into_store": str, + } + + # Concatenate the required and optional field definitions. + ALL_FIELDS = REQUIRED_FIELDS.copy() + ALL_FIELDS.update(OPTIONAL_FIELDS) + + # Checks that all the required fields are available. + missing_fields = [f for f in REQUIRED_FIELDS.keys() if f not in definition] + if len(missing_fields) > 0: + ParserError( + self._name + + " - missing required fields: " + + ", ".join(missing_fields) + + ".\nSee: {}#required-fields".format(BASE_DOC_URL) + ).handle_later() + + # Do we have any unknown field? + unknown_fields = [f for f in definition.keys() if f not in ALL_FIELDS] + if len(unknown_fields) > 0: + ParserError( + self._name + + " - unknown fields: " + + ", ".join(unknown_fields) + + ".\nSee: {}#required-fields".format(BASE_DOC_URL) + ).handle_later() + + # Checks the type for all the fields. + wrong_type_names = [ + "{} must be {}".format(f, str(ALL_FIELDS[f])) + for f in definition.keys() + if not isinstance(definition[f], ALL_FIELDS[f]) + ] + if len(wrong_type_names) > 0: + ParserError( + self._name + + " - " + + ", ".join(wrong_type_names) + + ".\nSee: {}#required-fields".format(BASE_DOC_URL) + ).handle_later() + + # Check that the email addresses doesn't contain spaces or commas + notification_emails = definition.get("notification_emails") + for notification_email in notification_emails: + if not validate_notification_email(notification_email): + ParserError( + self._name + + " - invalid email address: " + + notification_email + + ".\nSee: {}".format(BASE_DOC_URL) + ).handle_later() + + # Check that the lists are not empty and that data in the lists + # have the correct types. + list_fields = [f for f in definition if isinstance(definition[f], list)] + for field in list_fields: + # Check for empty lists. + if len(definition[field]) == 0: + ParserError( + ( + "Field '{}' for probe '{}' must not be empty" + + ".\nSee: {}#required-fields)" + ).format(field, self._name, BASE_DOC_URL) + ).handle_later() + # Check the type of the list content. + broken_types = [ + not isinstance(v, LIST_FIELDS_CONTENT[field]) for v in definition[field] + ] + if any(broken_types): + ParserError( + ( + "Field '{}' for probe '{}' must only contain values of type {}" + ".\nSee: {}#the-yaml-definition-file)" + ).format( + field, + self._name, + str(LIST_FIELDS_CONTENT[field]), + BASE_DOC_URL, + ) + ).handle_later() + + # Check that keys are only added to keyed scalars and that their values are valid + MAX_KEY_COUNT = 100 + MAX_KEY_LENGTH = 72 + keys = definition.get("keys") + if keys is not None: + if not definition.get("keyed", False): + ParserError( + self._name + + "- invalid field: " + + "\n`keys` field only valid for keyed histograms" + ).handle_later() + + if len(keys) > MAX_KEY_COUNT: + ParserError( + self._name + + " - exceeding key count: " + + "\n`keys` values count must not exceed {}".format(MAX_KEY_COUNT) + ).handle_later() + + invalid = list(filter(lambda k: len(k) > MAX_KEY_LENGTH, keys)) + if len(invalid) > 0: + ParserError( + self._name + + " - invalid key value" + + "\n `keys` values are exceeding length {}:".format(MAX_KEY_LENGTH) + + ", ".join(invalid) + ).handle_later() + + def validate_values(self, definition): + """This function checks that the fields have the correct values. + + :param definition: the dictionary containing the scalar properties. + :raises ParserError: if a scalar definition field contains an unexpected value. + """ + + if not self._strict_type_checks: + return + + # Validate the scalar kind. + scalar_kind = definition.get("kind") + if scalar_kind not in SCALAR_TYPES_MAP.keys(): + ParserError( + self._name + + " - unknown scalar kind: " + + scalar_kind + + ".\nSee: {}".format(BASE_DOC_URL) + ).handle_later() + + # Validate the collection policy. + collection_policy = definition.get("release_channel_collection", None) + if collection_policy and collection_policy not in ["opt-in", "opt-out"]: + ParserError( + self._name + + " - unknown collection policy: " + + collection_policy + + ".\nSee: {}#optional-fields".format(BASE_DOC_URL) + ).handle_later() + + # Validate operating_systems. + operating_systems = definition.get("operating_systems", []) + for operating_system in operating_systems: + if not utils.is_valid_os(operating_system): + ParserError( + self._name + + " - invalid entry in operating_systems: " + + operating_system + + ".\nSee: {}#optional-fields".format(BASE_DOC_URL) + ).handle_later() + + # Validate record_in_processes. + record_in_processes = definition.get("record_in_processes", []) + for proc in record_in_processes: + if not utils.is_valid_process_name(proc): + ParserError( + self._name + + " - unknown value in record_in_processes: " + + proc + + ".\nSee: {}".format(BASE_DOC_URL) + ).handle_later() + + # Validate product. + products = definition.get("products", []) + for product in products: + if not utils.is_valid_product(product): + ParserError( + self._name + + " - unknown value in products: " + + product + + ".\nSee: {}".format(BASE_DOC_URL) + ).handle_later() + if utils.is_geckoview_streaming_product(product): + keyed = definition.get("keyed") + if keyed: + ParserError( + "%s - keyed Scalars not supported for product %s" + % (self._name, product) + ).handle_later() + + # Validate the expiration version. + # Historical versions of Scalars.json may contain expiration versions + # using the deprecated format 'N.Na1'. Those scripts set + # self._strict_type_checks to false. + expires = definition.get("expires") + if not utils.validate_expiration_version(expires) and self._strict_type_checks: + ParserError( + "{} - invalid expires: {}.\nSee: {}#required-fields".format( + self._name, expires, BASE_DOC_URL + ) + ).handle_later() + + @property + def category(self): + """Get the category name""" + return self._category_name + + @property + def name(self): + """Get the scalar name""" + return self._name + + @property + def label(self): + """Get the scalar label generated from the scalar and category names.""" + return self._category_name + "." + self._name + + @property + def enum_label(self): + """Get the enum label generated from the scalar and category names. This is used to + generate the enum tables.""" + + # The scalar name can contain informations about its hierarchy (e.g. 'a.b.scalar'). + # We can't have dots in C++ enums, replace them with an underscore. Also, make the + # label upper case for consistency with the histogram enums. + return self.label.replace(".", "_").upper() + + @property + def bug_numbers(self): + """Get the list of related bug numbers""" + return self._definition["bug_numbers"] + + @property + def description(self): + """Get the scalar description""" + return self._definition["description"] + + @property + def expires(self): + """Get the scalar expiration""" + return self._expires + + @property + def kind(self): + """Get the scalar kind""" + return self._definition["kind"] + + @property + def keys(self): + """Get the allowed keys for this scalar or [] if there aren't any'""" + return self._definition.get("keys", []) + + @property + def keyed(self): + """Boolean indicating whether this is a keyed scalar""" + return self._definition.get("keyed", False) + + @property + def nsITelemetry_kind(self): + """Get the scalar kind constant defined in nsITelemetry""" + return SCALAR_TYPES_MAP.get(self.kind) + + @property + def notification_emails(self): + """Get the list of notification emails""" + return self._definition["notification_emails"] + + @property + def record_in_processes(self): + """Get the non-empty list of processes to record data in""" + # Before we added content process support in bug 1278556, we only recorded in the + # main process. + return self._definition.get("record_in_processes", ["main"]) + + @property + def record_in_processes_enum(self): + """Get the non-empty list of flags representing the processes to record data in""" + return [utils.process_name_to_enum(p) for p in self.record_in_processes] + + @property + def products(self): + """Get the non-empty list of products to record data on""" + return self._definition.get("products") + + @property + def products_enum(self): + """Get the non-empty list of flags representing products to record data on""" + return [utils.product_name_to_enum(p) for p in self.products] + + @property + def dataset(self): + """Get the nsITelemetry constant equivalent to the chosen release channel collection + policy for the scalar. + """ + rcc = self.dataset_short + table = { + "opt-in": "DATASET_PRERELEASE_CHANNELS", + "opt-out": "DATASET_ALL_CHANNELS", + } + return "nsITelemetry::" + table[rcc] + + @property + def dataset_short(self): + """Get the short name of the chosen release channel collection policy for the scalar.""" + # The collection policy is optional, but we still define a default + # behaviour for it. + return self._definition.get("release_channel_collection", "opt-in") + + @property + def operating_systems(self): + """Get the list of operating systems to record data on""" + return self._definition.get("operating_systems", ["all"]) + + def record_on_os(self, target_os): + """Check if this probe should be recorded on the passed os.""" + os = self.operating_systems + if "all" in os: + return True + + canonical_os = utils.canonical_os(target_os) + + if "unix" in os and canonical_os in utils.UNIX_LIKE_OS: + return True + + return canonical_os in os + + @property + def record_into_store(self): + """Get the list of stores this probe should be recorded into""" + return self._definition.get("record_into_store", ["main"]) + + +def load_scalars(filename, strict_type_checks=True): + """Parses a YAML file containing the scalar definition. + + :param filename: the YAML file containing the scalars definition. + :raises ParserError: if the scalar file cannot be opened or parsed. + """ + + # Parse the scalar definitions from the YAML file. + scalars = None + try: + with io.open(filename, "r", encoding="utf-8") as f: + scalars = yaml.safe_load(f) + except IOError as e: + ParserError("Error opening " + filename + ": " + str(e)).handle_now() + except ValueError as e: + ParserError( + "Error parsing scalars in {}: {}" + ".\nSee: {}".format(filename, e, BASE_DOC_URL) + ).handle_now() + + scalar_list = [] + + # Scalars are defined in a fixed two-level hierarchy within the definition file. + # The first level contains the category name, while the second level contains the + # probe name (e.g. "category.name: probe: ..."). + for category_name in sorted(scalars): + category = scalars[category_name] + + # Make sure that the category has at least one probe in it. + if not category or len(category) == 0: + ParserError( + 'Category "{}" must have at least one probe in it' + ".\nSee: {}".format(category_name, BASE_DOC_URL) + ).handle_later() + + for probe_name in sorted(category): + # We found a scalar type. Go ahead and parse it. + scalar_info = category[probe_name] + scalar_list.append( + ScalarType(category_name, probe_name, scalar_info, strict_type_checks) + ) + + return scalar_list diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/parse_user_interactions.py b/toolkit/components/telemetry/build_scripts/mozparsers/parse_user_interactions.py new file mode 100644 index 0000000000..6863d67ec4 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/mozparsers/parse_user_interactions.py @@ -0,0 +1,256 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import atexit +import io +import re + +import yaml + +from .shared_telemetry_utils import ParserError + +atexit.register(ParserError.exit_func) + +BASE_DOC_URL = ( + "https://firefox-source-docs.mozilla.org/toolkit/components/" + + "telemetry/telemetry/collection/user_interactions.html" +) + + +class UserInteractionType: + """A class for representing a UserInteraction definition.""" + + def __init__(self, category_name, user_interaction_name, definition): + # Validate and set the name, so we don't need to pass it to the other + # validation functions. + self.validate_names(category_name, user_interaction_name) + self._name = user_interaction_name + self._category_name = category_name + + # Validating the UserInteraction definition. + self.validate_types(definition) + + # Everything is ok, set the rest of the data. + self._definition = definition + + def validate_names(self, category_name, user_interaction_name): + """Validate the category and UserInteraction name: + - Category name must be alpha-numeric + '.', no leading/trailing digit or '.'. + - UserInteraction name must be alpha-numeric + '_', no leading/trailing digit or '_'. + + :param category_name: the name of the category the UserInteraction is in. + :param user_interaction_name: the name of the UserInteraction. + :raises ParserError: if the length of the names exceeds the limit or they don't + conform our name specification. + """ + + # Enforce a maximum length on category and UserInteraction names. + MAX_NAME_LENGTH = 40 + for n in [category_name, user_interaction_name]: + if len(n) > MAX_NAME_LENGTH: + ParserError( + ( + "Name '{}' exceeds maximum name length of {} characters.\n" + "See: {}#the-yaml-definition-file" + ).format(n, MAX_NAME_LENGTH, BASE_DOC_URL) + ).handle_later() + + def check_name(name, error_msg_prefix, allowed_char_regexp): + # Check if we only have the allowed characters. + chars_regxp = r"^[a-zA-Z0-9" + allowed_char_regexp + r"]+$" + if not re.search(chars_regxp, name): + ParserError( + ( + error_msg_prefix + " name must be alpha-numeric. Got: '{}'.\n" + "See: {}#the-yaml-definition-file" + ).format(name, BASE_DOC_URL) + ).handle_later() + + # Don't allow leading/trailing digits, '.' or '_'. + if re.search(r"(^[\d\._])|([\d\._])$", name): + ParserError( + ( + error_msg_prefix + " name must not have a leading/trailing " + "digit, a dot or underscore. Got: '{}'.\n" + " See: {}#the-yaml-definition-file" + ).format(name, BASE_DOC_URL) + ).handle_later() + + check_name(category_name, "Category", r"\.") + check_name(user_interaction_name, "UserInteraction", r"_") + + def validate_types(self, definition): + """This function performs some basic sanity checks on the UserInteraction + definition: + - Checks that all the required fields are available. + - Checks that all the fields have the expected types. + + :param definition: the dictionary containing the UserInteraction + properties. + :raises ParserError: if a UserInteraction definition field is of the + wrong type. + :raises ParserError: if a required field is missing or unknown fields are present. + """ + + # The required and optional fields in a UserInteraction definition. + REQUIRED_FIELDS = { + "bug_numbers": list, # This contains ints. See LIST_FIELDS_CONTENT. + "description": str, + } + + # The types for the data within the fields that hold lists. + LIST_FIELDS_CONTENT = { + "bug_numbers": int, + } + + ALL_FIELDS = REQUIRED_FIELDS.copy() + + # Checks that all the required fields are available. + missing_fields = [f for f in REQUIRED_FIELDS.keys() if f not in definition] + if len(missing_fields) > 0: + ParserError( + self._name + + " - missing required fields: " + + ", ".join(missing_fields) + + ".\nSee: {}#required-fields".format(BASE_DOC_URL) + ).handle_later() + + # Do we have any unknown field? + unknown_fields = [f for f in definition.keys() if f not in ALL_FIELDS] + if len(unknown_fields) > 0: + ParserError( + self._name + + " - unknown fields: " + + ", ".join(unknown_fields) + + ".\nSee: {}#required-fields".format(BASE_DOC_URL) + ).handle_later() + + # Checks the type for all the fields. + wrong_type_names = [ + "{} must be {}".format(f, str(ALL_FIELDS[f])) + for f in definition.keys() + if not isinstance(definition[f], ALL_FIELDS[f]) + ] + if len(wrong_type_names) > 0: + ParserError( + self._name + + " - " + + ", ".join(wrong_type_names) + + ".\nSee: {}#required-fields".format(BASE_DOC_URL) + ).handle_later() + + # Check that the lists are not empty and that data in the lists + # have the correct types. + list_fields = [f for f in definition if isinstance(definition[f], list)] + for field in list_fields: + # Check for empty lists. + if len(definition[field]) == 0: + ParserError( + ( + "Field '{}' for probe '{}' must not be empty" + + ".\nSee: {}#required-fields)" + ).format(field, self._name, BASE_DOC_URL) + ).handle_later() + # Check the type of the list content. + broken_types = [ + not isinstance(v, LIST_FIELDS_CONTENT[field]) for v in definition[field] + ] + if any(broken_types): + ParserError( + ( + "Field '{}' for probe '{}' must only contain values of type {}" + ".\nSee: {}#the-yaml-definition-file)" + ).format( + field, + self._name, + str(LIST_FIELDS_CONTENT[field]), + BASE_DOC_URL, + ) + ).handle_later() + + @property + def category(self): + """Get the category name""" + return self._category_name + + @property + def name(self): + """Get the UserInteraction name""" + return self._name + + @property + def label(self): + """Get the UserInteraction label generated from the UserInteraction + and category names. + """ + return self._category_name + "." + self._name + + @property + def bug_numbers(self): + """Get the list of related bug numbers""" + return self._definition["bug_numbers"] + + @property + def description(self): + """Get the UserInteraction description""" + return self._definition["description"] + + +def load_user_interactions(filename): + """Parses a YAML file containing the UserInteraction definition. + + :param filename: the YAML file containing the UserInteraction definition. + :raises ParserError: if the UserInteraction file cannot be opened or + parsed. + """ + + # Parse the UserInteraction definitions from the YAML file. + user_interactions = None + try: + with io.open(filename, "r", encoding="utf-8") as f: + user_interactions = yaml.safe_load(f) + except IOError as e: + ParserError("Error opening " + filename + ": " + str(e)).handle_now() + except ValueError as e: + ParserError( + "Error parsing UserInteractions in {}: {}" + ".\nSee: {}".format(filename, e, BASE_DOC_URL) + ).handle_now() + + user_interaction_list = [] + + # UserInteractions are defined in a fixed two-level hierarchy within the + # definition file. The first level contains the category name, while the + # second level contains the UserInteraction name + # (e.g. "category.name: user.interaction: ..."). + for category_name in sorted(user_interactions): + category = user_interactions[category_name] + + # Make sure that the category has at least one UserInteraction in it. + if not category or len(category) == 0: + ParserError( + 'Category "{}" must have at least one UserInteraction in it' + ".\nSee: {}".format(category_name, BASE_DOC_URL) + ).handle_later() + + for user_interaction_name in sorted(category): + # We found a UserInteraction type. Go ahead and parse it. + user_interaction_info = category[user_interaction_name] + user_interaction_list.append( + UserInteractionType( + category_name, user_interaction_name, user_interaction_info + ) + ) + + return user_interaction_list + + +def from_files(filenames): + all_user_interactions = [] + + for filename in filenames: + all_user_interactions += load_user_interactions(filename) + + for user_interaction in all_user_interactions: + yield user_interaction diff --git a/toolkit/components/telemetry/build_scripts/mozparsers/shared_telemetry_utils.py b/toolkit/components/telemetry/build_scripts/mozparsers/shared_telemetry_utils.py new file mode 100644 index 0000000000..4b4cc9f685 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/mozparsers/shared_telemetry_utils.py @@ -0,0 +1,185 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This file contains utility functions shared by the scalars and the histogram generation +# scripts. + +import os +import re +import sys + +import yaml + +# This is a list of flags that determine which process a measurement is allowed +# to record from. +KNOWN_PROCESS_FLAGS = { + "all": "All", + "all_children": "AllChildren", + "main": "Main", + "content": "Content", + "gpu": "Gpu", + "rdd": "Rdd", + "socket": "Socket", + "utility": "Utility", + # Historical Values + "all_childs": "AllChildren", # Supporting files from before bug 1363725 +} + +GECKOVIEW_STREAMING_PRODUCT = "geckoview_streaming" + +SUPPORTED_PRODUCTS = { + "firefox": "Firefox", + "fennec": "Fennec", + GECKOVIEW_STREAMING_PRODUCT: "GeckoviewStreaming", + "thunderbird": "Thunderbird", + # Historical, deprecated values: + # 'geckoview': 'Geckoview', +} + +SUPPORTED_OPERATING_SYSTEMS = [ + "mac", + "linux", + "windows", + "android", + "unix", + "all", +] + +# mozinfo identifies linux, BSD variants, Solaris and SunOS as unix +# Solaris and SunOS are identified as "unix" OS. +UNIX_LIKE_OS = [ + "unix", + "linux", + "bsd", +] + +CANONICAL_OPERATING_SYSTEMS = { + "darwin": "mac", + "linux": "linux", + "winnt": "windows", + "android": "android", + # for simplicity we treat all BSD and Solaris systems as unix + "gnu/kfreebsd": "unix", + "sunos": "unix", + "dragonfly": "unix", + "freeunix": "unix", + "netunix": "unix", + "openunix": "unix", +} + +PROCESS_ENUM_PREFIX = "mozilla::Telemetry::Common::RecordedProcessType::" +PRODUCT_ENUM_PREFIX = "mozilla::Telemetry::Common::SupportedProduct::" + + +class ParserError(Exception): + """Thrown by different probe parsers. Errors are partitioned into + 'immediately fatal' and 'eventually fatal' so that the parser can print + multiple error messages at a time. See bug 1401612 .""" + + eventual_errors = [] + + def __init__(self, *args): + Exception.__init__(self, *args) + + def handle_later(self): + ParserError.eventual_errors.append(self) + + def handle_now(self): + ParserError.print_eventuals() + print(str(self), file=sys.stderr) + sys.stderr.flush() + os._exit(1) + + @classmethod + def print_eventuals(cls): + while cls.eventual_errors: + print(str(cls.eventual_errors.pop(0)), file=sys.stderr) + + @classmethod + def exit_func(cls): + if cls.eventual_errors: + cls("Some errors occurred").handle_now() + + +def is_valid_process_name(name): + return name in KNOWN_PROCESS_FLAGS + + +def process_name_to_enum(name): + return PROCESS_ENUM_PREFIX + KNOWN_PROCESS_FLAGS.get(name) + + +def is_valid_product(name): + return name in SUPPORTED_PRODUCTS + + +def is_geckoview_streaming_product(name): + return name == GECKOVIEW_STREAMING_PRODUCT + + +def is_valid_os(name): + return name in SUPPORTED_OPERATING_SYSTEMS + + +def canonical_os(os): + """Translate possible OS_TARGET names to their canonical value.""" + + return CANONICAL_OPERATING_SYSTEMS.get(os.lower()) or "unknown" + + +def product_name_to_enum(product): + if not is_valid_product(product): + raise ParserError("Invalid product {}".format(product)) + return PRODUCT_ENUM_PREFIX + SUPPORTED_PRODUCTS.get(product) + + +def static_assert(output, expression, message): + """Writes a C++ compile-time assertion expression to a file. + :param output: the output stream. + :param expression: the expression to check. + :param message: the string literal that will appear if the expression evaluates to + false. + """ + print('static_assert(%s, "%s");' % (expression, message), file=output) + + +def validate_expiration_version(expiration): + """Makes sure the expiration version has the expected format. + + Allowed examples: "10", "20", "60", "never" + Disallowed examples: "Never", "asd", "4000000", "60a1", "30.5a1" + + :param expiration: the expiration version string. + :return: True if the expiration validates correctly, False otherwise. + """ + if expiration != "never" and not re.match(r"^\d{1,3}$", expiration): + return False + + return True + + +def add_expiration_postfix(expiration): + """Formats the expiration version and adds a version postfix if needed. + + :param expiration: the expiration version string. + :return: the modified expiration string. + """ + if re.match(r"^[1-9][0-9]*$", expiration): + return expiration + ".0a1" + + if re.match(r"^[1-9][0-9]*\.0$", expiration): + return expiration + "a1" + + return expiration + + +def load_yaml_file(filename): + """Load a YAML file from disk, throw a ParserError on failure.""" + try: + with open(filename, "r") as f: + return yaml.safe_load(f) + except IOError as e: + raise ParserError("Error opening " + filename + ": " + str(e)) + except ValueError as e: + raise ParserError("Error parsing processes in {}: {}".format(filename, e)) diff --git a/toolkit/components/telemetry/build_scripts/run_glean_parser.py b/toolkit/components/telemetry/build_scripts/run_glean_parser.py new file mode 100644 index 0000000000..e71206e9b0 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/run_glean_parser.py @@ -0,0 +1,17 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import sys +from pathlib import Path + +from glean_parser import lint + + +def main(output, *filenames): + if lint.glinter([Path(x) for x in filenames], {"allow_reserved": False}): + sys.exit(1) + + +if __name__ == "__main__": + main(sys.stdout, *sys.argv[1:]) diff --git a/toolkit/components/telemetry/build_scripts/setup.py b/toolkit/components/telemetry/build_scripts/setup.py new file mode 100644 index 0000000000..bd8967aec5 --- /dev/null +++ b/toolkit/components/telemetry/build_scripts/setup.py @@ -0,0 +1,32 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from setuptools import find_packages, setup + +VERSION = "1.0.0" + +with open("README.md", "r") as fh: + long_description = fh.read() + +setup( + author="Mozilla Telemetry Team", + author_email="telemetry-client-dev@mozilla.com", + url=( + "https://firefox-source-docs.mozilla.org/" + "toolkit/components/telemetry/telemetry/collection/index.html" + ), + name="mozparsers", + description="Shared parsers for the Telemetry probe regitries.", + long_description=long_description, + long_description_content_type="text/markdown", + license="MPL 2.0", + packages=find_packages(), + version=VERSION, + classifiers=[ + "Topic :: Software Development :: Build Tools", + "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", + "Programming Language :: Python :: 2.7", + ], + keywords=["mozilla", "telemetry", "parsers"], +) |