1 files changed, 435 insertions, 0 deletions
diff --git a/third_party/python/glean_parser/glean_parser/metrics.py b/third_party/python/glean_parser/glean_parser/metrics.py
new file mode 100644
index 0000000000..6398938997
--- /dev/null
+++ b/third_party/python/glean_parser/glean_parser/metrics.py
@@ -0,0 +1,435 @@
+# -*- coding: utf-8 -*-
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Classes for each of the high-level metric types.
+"""
+
+import enum
+from typing import Any, Dict, List, Optional, Type, Union  # noqa
+
+
+from . import pings
+from . import tags
+from . import util
+
+
+# Important: if the values are ever changing here, make sure
+# to also fix mozilla/glean. Otherwise language bindings may
+# break there.
+class Lifetime(enum.Enum):
+    ping = 0
+    application = 1
+    user = 2
+
+
+class DataSensitivity(enum.Enum):
+    technical = 1
+    interaction = 2
+    web_activity = 3
+    highly_sensitive = 4
+
+
+class Metric:
+    typename: str = "ERROR"
+    glean_internal_metric_cat: str = "glean.internal.metrics"
+    metric_types: Dict[str, Any] = {}
+    default_store_names: List[str] = ["metrics"]
+
+    def __init__(
+        self,
+        type: str,
+        category: str,
+        name: str,
+        bugs: List[str],
+        description: str,
+        notification_emails: List[str],
+        expires: Any,
+        metadata: Optional[Dict] = None,
+        data_reviews: Optional[List[str]] = None,
+        version: int = 0,
+        disabled: bool = False,
+        lifetime: str = "ping",
+        send_in_pings: Optional[List[str]] = None,
+        unit: Optional[str] = None,
+        gecko_datapoint: str = "",
+        no_lint: Optional[List[str]] = None,
+        data_sensitivity: Optional[List[str]] = None,
+        defined_in: Optional[Dict] = None,
+        telemetry_mirror: Optional[str] = None,
+        _config: Optional[Dict[str, Any]] = None,
+        _validated: bool = False,
+    ):
+        # Avoid cyclical import
+        from . import parser
+
+        self.type = type
+        self.category = category
+        self.name = name
+        self.bugs = bugs
+        self.description = description
+        self.notification_emails = notification_emails
+        self.expires = expires
+        if metadata is None:
+            metadata = {}
+        self.metadata = metadata
+        if data_reviews is None:
+            data_reviews = []
+        self.data_reviews = data_reviews
+        self.version = version
+        self.disabled = disabled
+        self.lifetime = getattr(Lifetime, lifetime)
+        if send_in_pings is None:
+            send_in_pings = ["default"]
+        self.send_in_pings = send_in_pings
+        if unit is not None:
+            self.unit = unit
+        self.gecko_datapoint = gecko_datapoint
+        if no_lint is None:
+            no_lint = []
+        self.no_lint = no_lint
+        if data_sensitivity is not None:
+            self.data_sensitivity = [
+                getattr(DataSensitivity, x) for x in data_sensitivity
+            ]
+        self.defined_in = defined_in
+        if telemetry_mirror is not None:
+            self.telemetry_mirror = telemetry_mirror
+
+        # _validated indicates whether this metric has already been jsonschema
+        # validated (but not any of the Python-level validation).
+        if not _validated:
+            data = {
+                "$schema": parser.METRICS_ID,
+                self.category: {self.name: self._serialize_input()},
+            }  # type: Dict[str, util.JSONType]
+            for error in parser.validate(data):
+                raise ValueError(error)
+
+        # Store the config, but only after validation.
+        if _config is None:
+            _config = {}
+        self._config = _config
+
+        # Metrics in the special category "glean.internal.metrics" need to have
+        # an empty category string when identifying the metrics in the ping.
+        if self.category == Metric.glean_internal_metric_cat:
+            self.category = ""
+
+    def __init_subclass__(cls, **kwargs):
+        # Create a mapping of all of the subclasses of this class
+        if cls not in Metric.metric_types and hasattr(cls, "typename"):
+            Metric.metric_types[cls.typename] = cls
+        super().__init_subclass__(**kwargs)
+
+    @classmethod
+    def make_metric(
+        cls,
+        category: str,
+        name: str,
+        metric_info: Dict[str, util.JSONType],
+        config: Optional[Dict[str, Any]] = None,
+        validated: bool = False,
+    ):
+        """
+        Given a metric_info dictionary from metrics.yaml, return a metric
+        instance.
+
+        :param: category The category the metric lives in
+        :param: name The name of the metric
+        :param: metric_info A dictionary of the remaining metric parameters
+        :param: config A dictionary containing commandline configuration
+            parameters
+        :param: validated True if the metric has already gone through
+            jsonschema validation
+        :return: A new Metric instance.
+        """
+        if config is None:
+            config = {}
+
+        metric_type = metric_info["type"]
+        if not isinstance(metric_type, str):
+            raise TypeError(f"Unknown metric type {metric_type}")
+        return cls.metric_types[metric_type](
+            category=category,
+            name=name,
+            defined_in=getattr(metric_info, "defined_in", None),
+            _validated=validated,
+            _config=config,
+            **metric_info,
+        )
+
+    def serialize(self) -> Dict[str, util.JSONType]:
+        """
+        Serialize the metric back to JSON object model.
+        """
+        d = self.__dict__.copy()
+        # Convert enum fields back to strings
+        for key, val in d.items():
+            if isinstance(val, enum.Enum):
+                d[key] = d[key].name
+            if isinstance(val, set):
+                d[key] = sorted(list(val))
+            if isinstance(val, list) and len(val) and isinstance(val[0], enum.Enum):
+                d[key] = [x.name for x in val]
+        del d["name"]
+        del d["category"]
+        d.pop("_config", None)
+        d.pop("_generate_enums", None)
+        return d
+
+    def _serialize_input(self) -> Dict[str, util.JSONType]:
+        d = self.serialize()
+        modified_dict = util.remove_output_params(d, "defined_in")
+        return modified_dict
+
+    def identifier(self) -> str:
+        """
+        Create an identifier unique for this metric.
+        Generally, category.name; however, Glean internal
+        metrics only use name.
+        """
+        if not self.category:
+            return self.name
+        return ".".join((self.category, self.name))
+
+    def is_disabled(self) -> bool:
+        return self.disabled or self.is_expired()
+
+    def is_expired(self) -> bool:
+        def default_handler(expires) -> bool:
+            return util.is_expired(expires, self._config.get("expire_by_version"))
+
+        return self._config.get("custom_is_expired", default_handler)(self.expires)
+
+    def validate_expires(self):
+        def default_handler(expires):
+            return util.validate_expires(expires, self._config.get("expire_by_version"))
+
+        return self._config.get("custom_validate_expires", default_handler)(
+            self.expires
+        )
+
+    def is_internal_metric(self) -> bool:
+        return self.category in (Metric.glean_internal_metric_cat, "")
+
+
+class Boolean(Metric):
+    typename = "boolean"
+
+
+class String(Metric):
+    typename = "string"
+
+
+class StringList(Metric):
+    typename = "string_list"
+
+
+class Counter(Metric):
+    typename = "counter"
+
+
+class Quantity(Metric):
+    typename = "quantity"
+
+
+class TimeUnit(enum.Enum):
+    nanosecond = 0
+    microsecond = 1
+    millisecond = 2
+    second = 3
+    minute = 4
+    hour = 5
+    day = 6
+
+
+class TimeBase(Metric):
+    def __init__(self, *args, **kwargs):
+        self.time_unit = getattr(TimeUnit, kwargs.pop("time_unit", "millisecond"))
+        super().__init__(*args, **kwargs)
+
+
+class Timespan(TimeBase):
+    typename = "timespan"
+
+
+class TimingDistribution(TimeBase):
+    typename = "timing_distribution"
+
+    def __init__(self, *args, **kwargs):
+        self.time_unit = getattr(TimeUnit, kwargs.pop("time_unit", "nanosecond"))
+        Metric.__init__(self, *args, **kwargs)
+
+
+class MemoryUnit(enum.Enum):
+    byte = 0
+    kilobyte = 1
+    megabyte = 2
+    gigabyte = 3
+
+
+class MemoryDistribution(Metric):
+    typename = "memory_distribution"
+
+    def __init__(self, *args, **kwargs):
+        self.memory_unit = getattr(MemoryUnit, kwargs.pop("memory_unit", "byte"))
+        super().__init__(*args, **kwargs)
+
+
+class HistogramType(enum.Enum):
+    linear = 0
+    exponential = 1
+
+
+class CustomDistribution(Metric):
+    typename = "custom_distribution"
+
+    def __init__(self, *args, **kwargs):
+        self.range_min = kwargs.pop("range_min", 1)
+        self.range_max = kwargs.pop("range_max")
+        self.bucket_count = kwargs.pop("bucket_count")
+        self.histogram_type = getattr(
+            HistogramType, kwargs.pop("histogram_type", "exponential")
+        )
+        super().__init__(*args, **kwargs)
+
+
+class Datetime(TimeBase):
+    typename = "datetime"
+
+
+class Event(Metric):
+    typename = "event"
+
+    default_store_names = ["events"]
+
+    def __init__(self, *args, **kwargs):
+        self.extra_keys = kwargs.pop("extra_keys", {})
+        self.validate_extra_keys(self.extra_keys, kwargs.get("_config", {}))
+        super().__init__(*args, **kwargs)
+        self._generate_enums = [("allowed_extra_keys_with_types", "Extra")]
+
+    @property
+    def allowed_extra_keys(self):
+        # Sort keys so that output is deterministic
+        return sorted(list(self.extra_keys.keys()))
+
+    @property
+    def allowed_extra_keys_with_types(self):
+        # Sort keys so that output is deterministic
+        return sorted(
+            [(k, v.get("type", "string")) for (k, v) in self.extra_keys.items()],
+            key=lambda x: x[0],
+        )
+
+    @staticmethod
+    def validate_extra_keys(extra_keys: Dict[str, str], config: Dict[str, Any]) -> None:
+        if not config.get("allow_reserved") and any(
+            k.startswith("glean.") for k in extra_keys.keys()
+        ):
+            raise ValueError(
+                "Extra keys beginning with 'glean.' are reserved for "
+                "Glean internal use."
+            )
+
+
+class Uuid(Metric):
+    typename = "uuid"
+
+
+class Url(Metric):
+    typename = "url"
+
+
+class Jwe(Metric):
+    typename = "jwe"
+
+    def __init__(self, *args, **kwargs):
+        raise ValueError(
+            "JWE support was removed. "
+            "If you require this send an email to glean-team@mozilla.com."
+        )
+
+
+class CowString(str):
+    """
+    Wrapper class for strings that should be represented
+    as a `Cow<'static, str>` in Rust,
+    or `String` in other target languages.
+
+    This wraps `str`, so unless `CowString` is specifically
+    handled it acts (and serializes)
+    as a string.
+    """
+
+    def __init__(self, val: str):
+        self.inner: str = val
+
+    def __eq__(self, other):
+        return self.inner == other.inner
+
+    def __hash__(self):
+        return self.inner.__hash__()
+
+    def __lt__(self, other):
+        return self.inner.__lt__(other.inner)
+
+
+class Labeled(Metric):
+    labeled = True
+
+    def __init__(self, *args, **kwargs):
+        labels = kwargs.pop("labels", None)
+        if labels is not None:
+            self.ordered_labels = labels
+            self.labels = set([CowString(label) for label in labels])
+        else:
+            self.ordered_labels = None
+            self.labels = None
+        super().__init__(*args, **kwargs)
+
+    def serialize(self) -> Dict[str, util.JSONType]:
+        """
+        Serialize the metric back to JSON object model.
+        """
+        d = super().serialize()
+        d["labels"] = self.ordered_labels
+        del d["ordered_labels"]
+        return d
+
+
+class LabeledBoolean(Labeled, Boolean):
+    typename = "labeled_boolean"
+
+
+class LabeledString(Labeled, String):
+    typename = "labeled_string"
+
+
+class LabeledCounter(Labeled, Counter):
+    typename = "labeled_counter"
+
+
+class Rate(Metric):
+    typename = "rate"
+
+    def __init__(self, *args, **kwargs):
+        self.denominator_metric = kwargs.pop("denominator_metric", None)
+        super().__init__(*args, **kwargs)
+
+
+class Denominator(Counter):
+    typename = "denominator"
+    # A denominator is a counter with an additional list of numerators.
+    numerators: List[Rate] = []
+
+
+class Text(Metric):
+    typename = "text"
+
+
+ObjectTree = Dict[str, Dict[str, Union[Metric, pings.Ping, tags.Tag]]]