summaryrefslogtreecommitdiffstats
path: root/browser/components/newtab/content-src/asrouter/schemas/make-schemas.py
blob: 7fbf815aa078e321717740a9d46119755d415a59 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""Firefox Messaging System Messaging Experiment schema generator

The Firefox Messaging System handles several types of messages. This program
patches and combines those schemas into a single schema
(MessagingExperiment.schema.json) which is used to validate messaging
experiments coming from Nimbus.

Definitions from FxMsCommon.schema.json are bundled into this schema. This
allows all of the FxMS schemas to reference common definitions, e.g.
`localizableText` for translatable strings, via referencing the common schema.
The bundled schema will be re-written so that the references now point at the
top-level, generated schema.

Additionally, all self-references in each messaging schema will be rewritten
into absolute references, referencing each sub-schemas `$id`. This is requried
due to the JSONSchema validation library used by Experimenter not fully
supporting self-references and bundled schema.
"""

import json
import sys
from argparse import ArgumentParser
from itertools import chain
from pathlib import Path
from typing import Any, Dict, List, NamedTuple, Union
from urllib.parse import urlparse

import jsonschema


class SchemaDefinition(NamedTuple):
    """A definition of a schema that is to be bundled."""

    #: The $id of the generated schema.
    schema_id: str

    #: The path of the generated schema.
    schema_path: Path

    #: The message types that will be bundled into the schema.
    message_types: Dict[str, Path]

    #: What common definitions to bundle into the schema.
    #:
    #: If `True`, all definitions will be bundled.
    #: If `False`, no definitons will be bundled.
    #: If a list, only the named definitions will be bundled.
    bundle_common: Union[bool, List[str]]

    #: The testing corpus for the schema.
    test_corpus: Dict[str, Path]


SCHEMA_DIR = Path("..", "templates")

SCHEMAS = [
    SchemaDefinition(
        schema_id="resource://activity-stream/schemas/MessagingExperiment.schema.json",
        schema_path=Path("MessagingExperiment.schema.json"),
        message_types={
            "CFRUrlbarChiclet": (
                SCHEMA_DIR / "CFR" / "templates" / "CFRUrlbarChiclet.schema.json"
            ),
            "ExtensionDoorhanger": (
                SCHEMA_DIR / "CFR" / "templates" / "ExtensionDoorhanger.schema.json"
            ),
            "InfoBar": SCHEMA_DIR / "CFR" / "templates" / "InfoBar.schema.json",
            "NewtabPromoMessage": (
                SCHEMA_DIR / "PBNewtab" / "NewtabPromoMessage.schema.json"
            ),
            "ProtectionsPanelMessage": (
                SCHEMA_DIR / "OnboardingMessage" / "ProtectionsPanelMessage.schema.json"
            ),
            "Spotlight": SCHEMA_DIR / "OnboardingMessage" / "Spotlight.schema.json",
            "ToastNotification": (
                SCHEMA_DIR / "ToastNotification" / "ToastNotification.schema.json"
            ),
            "ToolbarBadgeMessage": (
                SCHEMA_DIR / "OnboardingMessage" / "ToolbarBadgeMessage.schema.json"
            ),
            "UpdateAction": (
                SCHEMA_DIR / "OnboardingMessage" / "UpdateAction.schema.json"
            ),
            "WhatsNewMessage": (
                SCHEMA_DIR / "OnboardingMessage" / "WhatsNewMessage.schema.json"
            ),
        },
        bundle_common=True,
        test_corpus={
            "ReachExperiments": Path("corpus", "ReachExperiments.messages.json"),
            # These are generated via extract-test-corpus.js
            "CFRMessageProvider": Path("corpus", "CFRMessageProvider.messages.json"),
            "OnboardingMessageProvider": Path(
                "corpus", "OnboardingMessageProvider.messages.json"
            ),
            "PanelTestProvider": Path("corpus", "PanelTestProvider.messages.json"),
        },
    ),
    SchemaDefinition(
        schema_id=(
            "resource://activity-stream/schemas/"
            "BackgroundTaskMessagingExperiment.schema.json"
        ),
        schema_path=Path("BackgroundTaskMessagingExperiment.schema.json"),
        message_types={
            "ToastNotification": (
                SCHEMA_DIR / "ToastNotification" / "ToastNotification.schema.json"
            ),
        },
        bundle_common=True,
        # These are generated via extract-test-corpus.js
        test_corpus={
            # Just the "toast_notification" messages.
            "PanelTestProvider": Path(
                "corpus", "PanelTestProvider_toast_notification.messages.json"
            ),
        },
    ),
]

COMMON_SCHEMA_NAME = "FxMSCommon.schema.json"
COMMON_SCHEMA_PATH = Path(COMMON_SCHEMA_NAME)


class NestedRefResolver(jsonschema.RefResolver):
    """A custom ref resolver that handles bundled schema.

    This is the resolver used by Experimenter.
    """

    def __init__(self, schema):
        super().__init__(base_uri=None, referrer=None)

        if "$id" in schema:
            self.store[schema["$id"]] = schema

        if "$defs" in schema:
            for dfn in schema["$defs"].values():
                if "$id" in dfn:
                    self.store[dfn["$id"]] = dfn


def read_schema(path):
    """Read a schema from disk and parse it as JSON."""
    with path.open("r") as f:
        return json.load(f)


def extract_template_values(template):
    """Extract the possible template values (either via JSON Schema enum or const)."""
    enum = template.get("enum")
    if enum:
        return enum

    const = template.get("const")
    if const:
        return [const]


def patch_schema(schema, bundled_id, schema_id=None):
    """Patch the given schema.

    The JSON schema validator that Experimenter uses
    (https://pypi.org/project/jsonschema/) does not support relative references,
    nor does it support bundled schemas. We rewrite the schema so that all
    relative refs are transformed into absolute refs via the schema's `$id`.

    Additionally, we merge in the contents of FxMSCommon.schema.json, so all
    refs relative to that schema will be transformed to become relative to this
    schema.

    See-also: https://github.com/python-jsonschema/jsonschema/issues/313
    """
    if schema_id is None:
        schema_id = schema["$id"]

    def patch_impl(schema):
        ref = schema.get("$ref")

        if ref:
            uri = urlparse(ref)
            if (
                uri.scheme == ""
                and uri.netloc == ""
                and uri.path == ""
                and uri.fragment != ""
            ):
                schema["$ref"] = f"{schema_id}#{uri.fragment}"
            elif (uri.scheme, uri.path) == ("file", f"/{COMMON_SCHEMA_NAME}"):
                schema["$ref"] = f"{bundled_id}#{uri.fragment}"

        # If `schema` is object-like, inspect each of its indivual properties
        # and patch them.
        properties = schema.get("properties")
        if properties:
            for prop in properties.keys():
                patch_impl(properties[prop])

        # If `schema` is array-like, inspect each of its items and patch them.
        items = schema.get("items")
        if items:
            patch_impl(items)

        # Patch each `if`, `then`, `else`, and `not` sub-schema that is present.
        for key in ("if", "then", "else", "not"):
            if key in schema:
                patch_impl(schema[key])

        # Patch the items of each `oneOf`, `allOf`, and `anyOf` sub-schema that
        # is present.
        for key in ("oneOf", "allOf", "anyOf"):
            subschema = schema.get(key)
            if subschema:
                for i, alternate in enumerate(subschema):
                    patch_impl(alternate)

    # Patch the top-level type defined in the schema.
    patch_impl(schema)

    # Patch each named definition in the schema.
    for key in ("$defs", "definitions"):
        defns = schema.get(key)
        if defns:
            for defn_name, defn_value in defns.items():
                patch_impl(defn_value)

    return schema


def bundle_schema(schema_def: SchemaDefinition):
    """Create a bundled schema based on the schema definition."""
    # Patch each message type schema to resolve all self-references to be
    # absolute and rewrite # references to FxMSCommon.schema.json to be relative
    # to the new schema (because we are about to bundle its definitions).
    defs = {
        name: patch_schema(read_schema(path), bundled_id=schema_def.schema_id)
        for name, path in schema_def.message_types.items()
    }

    # Bundle the definitions from FxMSCommon.schema.json into this schema.
    if schema_def.bundle_common:

        def dfn_filter(name):
            if schema_def.bundle_common is True:
                return True

            return name in schema_def.bundle_common

        common_schema = patch_schema(
            read_schema(COMMON_SCHEMA_PATH),
            bundled_id=schema_def.schema_id,
            schema_id=schema_def.schema_id,
        )

        # patch_schema mutates the given schema, so we read a new copy in for
        # each bundle operation.
        defs.update(
            {
                name: dfn
                for name, dfn in common_schema["$defs"].items()
                if dfn_filter(name)
            }
        )

    # Ensure all bundled schemas have an $id so that $refs inside the
    # bundled schema work correctly (i.e, they will reference the subschema
    # and not the bundle).
    for name in schema_def.message_types.keys():
        subschema = defs[name]
        if "$id" not in subschema:
            raise ValueError(f"Schema {name} is missing an $id")

        props = subschema["properties"]
        if "template" not in props:
            raise ValueError(f"Schema {name} is missing a template")

        template = props["template"]
        if "enum" not in template and "const" not in template:
            raise ValueError(f"Schema {name} should have const or enum template")

    templates = {
        name: extract_template_values(defs[name]["properties"]["template"])
        for name in schema_def.message_types.keys()
    }

    # Ensure that each schema has a unique set of template values.
    for a in templates.keys():
        a_keys = set(templates[a])

        for b in templates.keys():
            if a == b:
                continue

            b_keys = set(templates[b])
            intersection = a_keys.intersection(b_keys)

            if len(intersection):
                raise ValueError(
                    f"Schema {a} and {b} have overlapping template values: "
                    f"{', '.join(intersection)}"
                )

    all_templates = list(chain.from_iterable(templates.values()))

    # Enforce that one of the templates must match (so that one of the if
    # branches will match).
    defs["Message"]["properties"]["template"]["enum"] = all_templates
    defs["TemplatedMessage"] = {
        "description": "An FxMS message of one of a variety of types.",
        "type": "object",
        "allOf": [
            # Ensure each message has all the fields defined in the base
            # Message type.
            #
            # This is slightly redundant because each message should
            # already inherit from this message type, but it is easier
            # to add this requirement here than to verify that each
            # message's schema is properly inheriting.
            {"$ref": f"{schema_def.schema_id}#/$defs/Message"},
            # For each message type, create a subschema that says if the
            # template field matches a value for a message type defined
            # in MESSAGE_TYPES, then the message must also match the
            # schema for that message type.
            #
            # This is done using `allOf: [{ if, then }]` instead of `oneOf: []`
            # because it provides better error messages. Using `if-then`
            # will only show validation errors for the sub-schema that
            # matches template, whereas using `oneOf` will show
            # validation errors for *all* sub-schemas, which makes
            # debugging messages much harder.
            *(
                {
                    "if": {
                        "type": "object",
                        "properties": {
                            "template": {
                                "type": "string",
                                "enum": templates[message_type],
                            },
                        },
                        "required": ["template"],
                    },
                    "then": {"$ref": f"{schema_def.schema_id}#/$defs/{message_type}"},
                }
                for message_type in schema_def.message_types
            ),
        ],
    }
    defs["MultiMessage"] = {
        "description": "An object containing an array of messages.",
        "type": "object",
        "properties": {
            "template": {"type": "string", "const": "multi"},
            "messages": {
                "type": "array",
                "description": "An array of messages.",
                "items": {"$ref": f"{schema_def.schema_id}#/$defs/TemplatedMessage"},
            },
        },
        "required": ["template", "messages"],
    }

    # Generate the combined schema.
    return {
        "$schema": "https://json-schema.org/draft/2019-09/schema",
        "$id": schema_def.schema_id,
        "title": "Messaging Experiment",
        "description": "A Firefox Messaging System message.",
        # A message must be one of:
        # - An object that contains id, template, and content fields
        # - An object that contains none of the above fields (empty message)
        # - An array of messages like the above
        "if": {
            "type": "object",
            "properties": {"template": {"const": "multi"}},
            "required": ["template"],
        },
        "then": {
            "$ref": f"{schema_def.schema_id}#/$defs/MultiMessage",
        },
        "else": {
            "$ref": f"{schema_def.schema_id}#/$defs/TemplatedMessage",
        },
        "$defs": defs,
    }


def check_diff(schema_def: SchemaDefinition, schema: Dict[str, Any]):
    """Check the generated schema matches the on-disk schema."""
    print(f"  Checking {schema_def.schema_path} for differences...")

    with schema_def.schema_path.open("r") as f:
        on_disk = json.load(f)

    if on_disk != schema:
        print(f"{schema_def.schema_path} does not match generated schema:")
        print("Generated schema:")
        json.dump(schema, sys.stdout, indent=2)
        print("\n\nOn Disk schema:")
        json.dump(on_disk, sys.stdout, indent=2)
        print("\n\n")

        raise ValueError("Schemas do not match!")


def validate_corpus(schema_def: SchemaDefinition, schema: Dict[str, Any]):
    """Check that the schema validates.

    This uses the same validation configuration that is used in Experimenter.
    """
    print("  Validating messages with Experimenter JSON Schema validator...")

    resolver = NestedRefResolver(schema)

    for provider, provider_path in schema_def.test_corpus.items():
        print(f"    Validating messages from {provider}:")

        try:
            with provider_path.open("r") as f:
                messages = json.load(f)
        except FileNotFoundError as e:
            if not provider_path.parent.exists():
                new_exc = Exception(
                    f"Could not find {provider_path}: Did you run "
                    "`mach xpcshell extract-test-corpus.js` ?"
                )
                raise new_exc from e

            raise e

        for i, message in enumerate(messages):
            template = message.get("template", "(no template)")
            msg_id = message.get("id", f"index {i}")

            print(
                f"      Validating {msg_id} {template} message with {schema_def.schema_path}..."
            )
            jsonschema.validate(instance=message, schema=schema, resolver=resolver)

        print()


def main(check=False):
    """Generate Nimbus feature schemas for Firefox Messaging System."""
    for schema_def in SCHEMAS:
        print(f"Generating {schema_def.schema_path} ...")
        schema = bundle_schema(schema_def)

        if check:
            print(f"Checking {schema_def.schema_path} ...")
            check_diff(schema_def, schema)
            validate_corpus(schema_def, schema)
        else:
            with schema_def.schema_path.open("wb") as f:
                print(f"Writing {schema_def.schema_path} ...")
                f.write(json.dumps(schema, indent=2).encode("utf-8"))
                f.write(b"\n")


if __name__ == "__main__":
    parser = ArgumentParser(description=main.__doc__)
    parser.add_argument(
        "--check",
        action="store_true",
        help="Check that the generated schemas have not changed and run validation tests.",
        default=False,
    )
    args = parser.parse_args()

    main(args.check)