1 files changed, 456 insertions, 0 deletions
diff --git a/browser/components/newtab/content-src/asrouter/schemas/make-schemas.py b/browser/components/newtab/content-src/asrouter/schemas/make-schemas.py
new file mode 100755
index 0000000000..91e6c2a9db
--- /dev/null
+++ b/browser/components/newtab/content-src/asrouter/schemas/make-schemas.py
@@ -0,0 +1,456 @@
+#!/usr/bin/env python3
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Firefox Messaging System Messaging Experiment schema generator
+
+The Firefox Messaging System handles several types of messages. This program
+patches and combines those schemas into a single schema
+(MessagingExperiment.schema.json) which is used to validate messaging
+experiments coming from Nimbus.
+
+Definitions from FxMsCommon.schema.json are bundled into this schema. This
+allows all of the FxMS schemas to reference common definitions, e.g.
+`localizableText` for translatable strings, via referencing the common schema.
+The bundled schema will be re-written so that the references now point at the
+top-level, generated schema.
+
+Additionally, all self-references in each messaging schema will be rewritten
+into absolute references, referencing each sub-schemas `$id`. This is requried
+due to the JSONSchema validation library used by Experimenter not fully
+supporting self-references and bundled schema.
+"""
+
+import json
+import sys
+from argparse import ArgumentParser
+from itertools import chain
+from pathlib import Path
+from typing import Any, Dict, List, NamedTuple, Union
+from urllib.parse import urlparse
+
+import jsonschema
+
+
+class SchemaDefinition(NamedTuple):
+    """A definition of a schema that is to be bundled."""
+
+    #: The $id of the generated schema.
+    schema_id: str
+
+    #: The path of the generated schema.
+    schema_path: Path
+
+    #: The message types that will be bundled into the schema.
+    message_types: Dict[str, Path]
+
+    #: What common definitions to bundle into the schema.
+    #:
+    #: If `True`, all definitions will be bundled.
+    #: If `False`, no definitons will be bundled.
+    #: If a list, only the named definitions will be bundled.
+    bundle_common: Union[bool, List[str]]
+
+    #: The testing corpus for the schema.
+    test_corpus: Dict[str, Path]
+
+
+SCHEMA_DIR = Path("..", "templates")
+
+SCHEMAS = [
+    SchemaDefinition(
+        schema_id="resource://activity-stream/schemas/MessagingExperiment.schema.json",
+        schema_path=Path("MessagingExperiment.schema.json"),
+        message_types={
+            "CFRUrlbarChiclet": (
+                SCHEMA_DIR / "CFR" / "templates" / "CFRUrlbarChiclet.schema.json"
+            ),
+            "ExtensionDoorhanger": (
+                SCHEMA_DIR / "CFR" / "templates" / "ExtensionDoorhanger.schema.json"
+            ),
+            "InfoBar": SCHEMA_DIR / "CFR" / "templates" / "InfoBar.schema.json",
+            "NewtabPromoMessage": (
+                SCHEMA_DIR / "PBNewtab" / "NewtabPromoMessage.schema.json"
+            ),
+            "ProtectionsPanelMessage": (
+                SCHEMA_DIR / "OnboardingMessage" / "ProtectionsPanelMessage.schema.json"
+            ),
+            "Spotlight": SCHEMA_DIR / "OnboardingMessage" / "Spotlight.schema.json",
+            "ToastNotification": (
+                SCHEMA_DIR / "ToastNotification" / "ToastNotification.schema.json"
+            ),
+            "ToolbarBadgeMessage": (
+                SCHEMA_DIR / "OnboardingMessage" / "ToolbarBadgeMessage.schema.json"
+            ),
+            "UpdateAction": (
+                SCHEMA_DIR / "OnboardingMessage" / "UpdateAction.schema.json"
+            ),
+            "WhatsNewMessage": (
+                SCHEMA_DIR / "OnboardingMessage" / "WhatsNewMessage.schema.json"
+            ),
+        },
+        bundle_common=True,
+        # These are generated via extract-test-corpus.js
+        test_corpus={
+            "CFRMessageProvider": Path("corpus", "CFRMessageProvider.messages.json"),
+            "OnboardingMessageProvider": Path(
+                "corpus", "OnboardingMessageProvider.messages.json"
+            ),
+            "PanelTestProvider": Path("corpus", "PanelTestProvider.messages.json"),
+        },
+    ),
+    SchemaDefinition(
+        schema_id=(
+            "resource://activity-stream/schemas/"
+            "BackgroundTaskMessagingExperiment.schema.json"
+        ),
+        schema_path=Path("BackgroundTaskMessagingExperiment.schema.json"),
+        message_types={
+            "ToastNotification": (
+                SCHEMA_DIR / "ToastNotification" / "ToastNotification.schema.json"
+            ),
+        },
+        bundle_common=True,
+        # These are generated via extract-test-corpus.js
+        test_corpus={
+            # Just the "toast_notification" messages.
+            "PanelTestProvider": Path(
+                "corpus", "PanelTestProvider_toast_notification.messages.json"
+            ),
+        },
+    ),
+]
+
+COMMON_SCHEMA_NAME = "FxMSCommon.schema.json"
+COMMON_SCHEMA_PATH = Path(COMMON_SCHEMA_NAME)
+
+
+class NestedRefResolver(jsonschema.RefResolver):
+    """A custom ref resolver that handles bundled schema.
+
+    This is the resolver used by Experimenter.
+    """
+
+    def __init__(self, schema):
+        super().__init__(base_uri=None, referrer=None)
+
+        if "$id" in schema:
+            self.store[schema["$id"]] = schema
+
+        if "$defs" in schema:
+            for dfn in schema["$defs"].values():
+                if "$id" in dfn:
+                    self.store[dfn["$id"]] = dfn
+
+
+def read_schema(path):
+    """Read a schema from disk and parse it as JSON."""
+    with path.open("r") as f:
+        return json.load(f)
+
+
+def extract_template_values(template):
+    """Extract the possible template values (either via JSON Schema enum or const)."""
+    enum = template.get("enum")
+    if enum:
+        return enum
+
+    const = template.get("const")
+    if const:
+        return [const]
+
+
+def patch_schema(schema, bundled_id, schema_id=None):
+    """Patch the given schema.
+
+    The JSON schema validator that Experimenter uses
+    (https://pypi.org/project/jsonschema/) does not support relative references,
+    nor does it support bundled schemas. We rewrite the schema so that all
+    relative refs are transformed into absolute refs via the schema's `$id`.
+
+    Additionally, we merge in the contents of FxMSCommon.schema.json, so all
+    refs relative to that schema will be transformed to become relative to this
+    schema.
+
+    See-also: https://github.com/python-jsonschema/jsonschema/issues/313
+    """
+    if schema_id is None:
+        schema_id = schema["$id"]
+
+    def patch_impl(schema):
+        ref = schema.get("$ref")
+
+        if ref:
+            uri = urlparse(ref)
+            if (
+                uri.scheme == ""
+                and uri.netloc == ""
+                and uri.path == ""
+                and uri.fragment != ""
+            ):
+                schema["$ref"] = f"{schema_id}#{uri.fragment}"
+            elif (uri.scheme, uri.path) == ("file", f"/{COMMON_SCHEMA_NAME}"):
+                schema["$ref"] = f"{bundled_id}#{uri.fragment}"
+
+        # If `schema` is object-like, inspect each of its indivual properties
+        # and patch them.
+        properties = schema.get("properties")
+        if properties:
+            for prop in properties.keys():
+                patch_impl(properties[prop])
+
+        # If `schema` is array-like, inspect each of its items and patch them.
+        items = schema.get("items")
+        if items:
+            patch_impl(items)
+
+        # Patch each `if`, `then`, `else`, and `not` sub-schema that is present.
+        for key in ("if", "then", "else", "not"):
+            if key in schema:
+                patch_impl(schema[key])
+
+        # Patch the items of each `oneOf`, `allOf`, and `anyOf` sub-schema that
+        # is present.
+        for key in ("oneOf", "allOf", "anyOf"):
+            subschema = schema.get(key)
+            if subschema:
+                for i, alternate in enumerate(subschema):
+                    patch_impl(alternate)
+
+    # Patch the top-level type defined in the schema.
+    patch_impl(schema)
+
+    # Patch each named definition in the schema.
+    for key in ("$defs", "definitions"):
+        defns = schema.get(key)
+        if defns:
+            for defn_name, defn_value in defns.items():
+                patch_impl(defn_value)
+
+    return schema
+
+
+def bundle_schema(schema_def: SchemaDefinition):
+    """Create a bundled schema based on the schema definition."""
+    # Patch each message type schema to resolve all self-references to be
+    # absolute and rewrite # references to FxMSCommon.schema.json to be relative
+    # to the new schema (because we are about to bundle its definitions).
+    defs = {
+        name: patch_schema(read_schema(path), bundled_id=schema_def.schema_id)
+        for name, path in schema_def.message_types.items()
+    }
+
+    # Bundle the definitions from FxMSCommon.schema.json into this schema.
+    if schema_def.bundle_common:
+
+        def dfn_filter(name):
+            if schema_def.bundle_common is True:
+                return True
+
+            return name in schema_def.bundle_common
+
+        common_schema = patch_schema(
+            read_schema(COMMON_SCHEMA_PATH),
+            bundled_id=schema_def.schema_id,
+            schema_id=schema_def.schema_id,
+        )
+
+        # patch_schema mutates the given schema, so we read a new copy in for
+        # each bundle operation.
+        defs.update(
+            {
+                name: dfn
+                for name, dfn in common_schema["$defs"].items()
+                if dfn_filter(name)
+            }
+        )
+
+    # Ensure all bundled schemas have an $id so that $refs inside the
+    # bundled schema work correctly (i.e, they will reference the subschema
+    # and not the bundle).
+    for name in schema_def.message_types.keys():
+        subschema = defs[name]
+        if "$id" not in subschema:
+            raise ValueError(f"Schema {name} is missing an $id")
+
+        props = subschema["properties"]
+        if "template" not in props:
+            raise ValueError(f"Schema {name} is missing a template")
+
+        template = props["template"]
+        if "enum" not in template and "const" not in template:
+            raise ValueError(f"Schema {name} should have const or enum template")
+
+    templates = {
+        name: extract_template_values(defs[name]["properties"]["template"])
+        for name in schema_def.message_types.keys()
+    }
+
+    # Ensure that each schema has a unique set of template values.
+    for a in templates.keys():
+        a_keys = set(templates[a])
+
+        for b in templates.keys():
+            if a == b:
+                continue
+
+            b_keys = set(templates[b])
+            intersection = a_keys.intersection(b_keys)
+
+            if len(intersection):
+                raise ValueError(
+                    f"Schema {a} and {b} have overlapping template values: "
+                    f"{', '.join(intersection)}"
+                )
+
+    all_templates = list(chain.from_iterable(templates.values()))
+
+    # Enforce that one of the templates must match (so that one of the if
+    # branches will match).
+    defs["Message"]["properties"]["template"]["enum"] = all_templates
+
+    # Generate the combined schema.
+    return {
+        "$schema": "https://json-schema.org/draft/2019-09/schema",
+        "$id": schema_def.schema_id,
+        "title": "Messaging Experiment",
+        "description": "A Firefox Messaging System message.",
+        # A message must be one of
+        # - an empty message (i.e., a completely empty object), which is the
+        #   equivalent of an experiment branch not providing a message; or
+        # - An object that contains a template field
+        "oneOf": [
+            {
+                "description": "An empty FxMS message.",
+                "type": "object",
+                "additionalProperties": False,
+            },
+            {
+                "allOf": [
+                    # Ensure each message has all the fields defined in the base
+                    # Message type.
+                    #
+                    # This is slightly redundant because each message should
+                    # already inherit from this message type, but it is easier
+                    # to add this requirement here than to verify that each
+                    # message's schema is properly inheriting.
+                    {"$ref": f"{schema_def.schema_id}#/$defs/Message"},
+                    # For each message type, create a subschema that says if the
+                    # template field matches a value for a message type defined
+                    # in MESSAGE_TYPES, then the message must also match the
+                    # schema for that message type.
+                    #
+                    # This is done using `allOf: [{ if, then }]` instead of `oneOf: []`
+                    # because it provides better error messages. Using `if-then`
+                    # will only show validation errors for the sub-schema that
+                    # matches template, whereas using `oneOf` will show
+                    # validation errors for *all* sub-schemas, which makes
+                    # debugging messages much harder.
+                    *(
+                        {
+                            "if": {
+                                "type": "object",
+                                "properties": {
+                                    "template": {
+                                        "type": "string",
+                                        "enum": templates[message_type],
+                                    },
+                                },
+                                "required": ["template"],
+                            },
+                            "then": {
+                                "$ref": f"{schema_def.schema_id}#/$defs/{message_type}"
+                            },
+                        }
+                        for message_type in schema_def.message_types
+                    ),
+                ],
+            },
+        ],
+        "$defs": defs,
+    }
+
+
+def check_diff(schema_def: SchemaDefinition, schema: Dict[str, Any]):
+    """Check the generated schema matches the on-disk schema."""
+    print(f"  Checking {schema_def.schema_path} for differences...")
+
+    with schema_def.schema_path.open("r") as f:
+        on_disk = json.load(f)
+
+    if on_disk != schema:
+        print(f"{schema_def.schema_path} does not match generated schema:")
+        print("Generated schema:")
+        json.dump(schema, sys.stdout, indent=2)
+        print("\n\nOn Disk schema:")
+        json.dump(on_disk, sys.stdout, indent=2)
+
+        raise ValueError("Schemas do not match!")
+
+
+def validate_corpus(schema_def: SchemaDefinition, schema: Dict[str, Any]):
+    """Check that the schema validates.
+
+    This uses the same validation configuration that is used in Experimenter.
+    """
+    print("  Validating messages with Experimenter JSON Schema validator...")
+
+    resolver = NestedRefResolver(schema)
+
+    for provider, provider_path in schema_def.test_corpus.items():
+        print(f"    Validating messages from {provider}:")
+
+        try:
+            with provider_path.open("r") as f:
+                messages = json.load(f)
+        except FileNotFoundError as e:
+            if not provider_path.parent.exists():
+                new_exc = Exception(
+                    f"Could not find {provider_path}: Did you run "
+                    "`mach xpcshell extract-test-corpus` ?"
+                )
+                raise new_exc from e
+
+            raise e
+
+        for message in messages:
+            template = message["template"]
+            msg_id = message["id"]
+
+            print(
+                f"      Validating {msg_id} {template} message with {schema_def.schema_path}..."
+            )
+            jsonschema.validate(instance=message, schema=schema, resolver=resolver)
+
+        print()
+
+
+def main(check=False):
+    """Generate Nimbus feature schemas for Firefox Messaging System."""
+    for schema_def in SCHEMAS:
+        print(f"Generating {schema_def.schema_path} ...")
+        schema = bundle_schema(schema_def)
+
+        if check:
+            print(f"Checking {schema_def.schema_path} ...")
+            check_diff(schema_def, schema)
+            validate_corpus(schema_def, schema)
+        else:
+            with schema_def.schema_path.open("wb") as f:
+                print(f"Writing {schema_def.schema_path} ...")
+                f.write(json.dumps(schema, indent=2).encode("utf-8"))
+                f.write(b"\n")
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(description=main.__doc__)
+    parser.add_argument(
+        "--check",
+        action="store_true",
+        help="Check that the generated schemas have not changed and run validation tests.",
+        default=False,
+    )
+    args = parser.parse_args()
+
+    main(args.check)